swr: avoid using exceptions for expected condition handling
[mesa.git] / src / gallium / drivers / swr / swr_state.cpp
index e7bf3618a7d8dbd22d4f22c525030a9c99f23e89..b0cbc21964ed33f9307c18c9ddfa421633df6471 100644 (file)
  * IN THE SOFTWARE.
  ***************************************************************************/
 
+// llvm redefines DEBUG
+#pragma push_macro("DEBUG")
+#undef DEBUG
+#include "JitManager.h"
+#pragma pop_macro("DEBUG")
+
 #include "common/os.h"
 #include "jit_api.h"
-#include "JitManager.h"
 #include "state_llvm.h"
 
 #include "gallivm/lp_bld_tgsi.h"
@@ -33,6 +38,7 @@
 #include "util/u_inlines.h"
 #include "util/u_helpers.h"
 #include "util/u_framebuffer.h"
+#include "util/u_viewport.h"
 
 #include "swr_state.h"
 #include "swr_context.h"
@@ -230,7 +236,7 @@ swr_create_sampler_state(struct pipe_context *pipe,
 
 static void
 swr_bind_sampler_states(struct pipe_context *pipe,
-                        unsigned shader,
+                        enum pipe_shader_type shader,
                         unsigned start,
                         unsigned num,
                         void **samplers)
@@ -239,7 +245,7 @@ swr_bind_sampler_states(struct pipe_context *pipe,
    unsigned i;
 
    assert(shader < PIPE_SHADER_TYPES);
-   assert(start + num <= Elements(ctx->samplers[shader]));
+   assert(start + num <= ARRAY_SIZE(ctx->samplers[shader]));
 
    /* set the new samplers */
    ctx->num_samplers[shader] = num;
@@ -277,7 +283,7 @@ swr_create_sampler_view(struct pipe_context *pipe,
 
 static void
 swr_set_sampler_views(struct pipe_context *pipe,
-                      unsigned shader,
+                      enum pipe_shader_type shader,
                       unsigned start,
                       unsigned num,
                       struct pipe_sampler_view **views)
@@ -288,7 +294,7 @@ swr_set_sampler_views(struct pipe_context *pipe,
    assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
 
    assert(shader < PIPE_SHADER_TYPES);
-   assert(start + num <= Elements(ctx->sampler_views[shader]));
+   assert(start + num <= ARRAY_SIZE(ctx->sampler_views[shader]));
 
    /* set the new sampler views */
    ctx->num_sampler_views[shader] = num;
@@ -317,8 +323,7 @@ static void *
 swr_create_vs_state(struct pipe_context *pipe,
                     const struct pipe_shader_state *vs)
 {
-   struct swr_vertex_shader *swr_vs =
-      (swr_vertex_shader *)CALLOC_STRUCT(swr_vertex_shader);
+   struct swr_vertex_shader *swr_vs = new swr_vertex_shader;
    if (!swr_vs)
       return NULL;
 
@@ -327,8 +332,6 @@ swr_create_vs_state(struct pipe_context *pipe,
 
    lp_build_tgsi_info(vs->tokens, &swr_vs->info);
 
-   swr_vs->func = swr_compile_vs(pipe, swr_vs);
-
    swr_vs->soState = {0};
 
    if (swr_vs->pipe.stream_output.num_outputs) {
@@ -368,7 +371,11 @@ swr_delete_vs_state(struct pipe_context *pipe, void *vs)
 {
    struct swr_vertex_shader *swr_vs = (swr_vertex_shader *)vs;
    FREE((void *)swr_vs->pipe.tokens);
-   FREE(vs);
+   struct swr_screen *screen = swr_screen(pipe->screen);
+   if (!swr_is_fence_pending(screen->flush_fence))
+      swr_fence_submit(swr_context(pipe), screen->flush_fence);
+   swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
+   delete swr_vs;
 }
 
 static void *
@@ -404,6 +411,10 @@ swr_delete_fs_state(struct pipe_context *pipe, void *fs)
 {
    struct swr_fragment_shader *swr_fs = (swr_fragment_shader *)fs;
    FREE((void *)swr_fs->pipe.tokens);
+   struct swr_screen *screen = swr_screen(pipe->screen);
+   if (!swr_is_fence_pending(screen->flush_fence))
+      swr_fence_submit(swr_context(pipe), screen->flush_fence);
+   swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
    delete swr_fs;
 }
 
@@ -412,13 +423,13 @@ static void
 swr_set_constant_buffer(struct pipe_context *pipe,
                         uint shader,
                         uint index,
-                        struct pipe_constant_buffer *cb)
+                        const struct pipe_constant_buffer *cb)
 {
    struct swr_context *ctx = swr_context(pipe);
    struct pipe_resource *constants = cb ? cb->buffer : NULL;
 
    assert(shader < PIPE_SHADER_TYPES);
-   assert(index < Elements(ctx->constants[shader]));
+   assert(index < ARRAY_SIZE(ctx->constants[shader]));
 
    /* note: reference counting */
    util_copy_constant_buffer(&ctx->constants[shader][index], cb);
@@ -444,6 +455,7 @@ swr_create_vertex_elements_state(struct pipe_context *pipe,
    assert(num_elements <= PIPE_MAX_ATTRIBS);
    velems = CALLOC_STRUCT(swr_vertex_element_state);
    if (velems) {
+      velems->fsState.bVertexIDOffsetEnable = true;
       velems->fsState.numAttribs = num_elements;
       for (unsigned i = 0; i < num_elements; i++) {
          // XXX: we should do this keyed on the VS usage info
@@ -573,6 +585,10 @@ swr_set_scissor_states(struct pipe_context *pipe,
    struct swr_context *ctx = swr_context(pipe);
 
    ctx->scissor = *scissor;
+   ctx->swr_scissor.xmin = scissor->minx;
+   ctx->swr_scissor.xmax = scissor->maxx;
+   ctx->swr_scissor.ymin = scissor->miny;
+   ctx->swr_scissor.ymax = scissor->maxy;
    ctx->dirty |= SWR_NEW_SCISSOR;
 }
 
@@ -666,13 +682,203 @@ swr_update_resource_status(struct pipe_context *pipe,
          swr_resource_read(ib->buffer);
    }
 
+   /* transform feedback buffers */
+   for (uint32_t i = 0; i < ctx->num_so_targets; i++) {
+      struct pipe_stream_output_target *target = ctx->so_targets[i];
+      if (target && target->buffer)
+         swr_resource_write(target->buffer);
+   }
+
    /* texture sampler views */
-   for (uint32_t i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
+   for (uint32_t j : {PIPE_SHADER_VERTEX, PIPE_SHADER_FRAGMENT}) {
+      for (uint32_t i = 0; i < ctx->num_sampler_views[j]; i++) {
+         struct pipe_sampler_view *view = ctx->sampler_views[j][i];
+         if (view)
+            swr_resource_read(view->texture);
+      }
+   }
+
+   /* constant buffers */
+   for (uint32_t j : {PIPE_SHADER_VERTEX, PIPE_SHADER_FRAGMENT}) {
+      for (uint32_t i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
+         struct pipe_constant_buffer *cb = &ctx->constants[j][i];
+         if (cb->buffer)
+            swr_resource_read(cb->buffer);
+      }
+   }
+}
+
+static void
+swr_update_texture_state(struct swr_context *ctx,
+                         unsigned shader_type,
+                         unsigned num_sampler_views,
+                         swr_jit_texture *textures)
+{
+   for (unsigned i = 0; i < num_sampler_views; i++) {
       struct pipe_sampler_view *view =
-         ctx->sampler_views[PIPE_SHADER_FRAGMENT][i];
-      if (view)
-         swr_resource_read(view->texture);
+         ctx->sampler_views[shader_type][i];
+      struct swr_jit_texture *jit_tex = &textures[i];
+
+      memset(jit_tex, 0, sizeof(*jit_tex));
+      if (view) {
+         struct pipe_resource *res = view->texture;
+         struct swr_resource *swr_res = swr_resource(res);
+         SWR_SURFACE_STATE *swr = &swr_res->swr;
+         size_t *mip_offsets = swr_res->mip_offsets;
+         if (swr_res->has_depth && swr_res->has_stencil &&
+            !util_format_has_depth(util_format_description(view->format))) {
+            swr = &swr_res->secondary;
+            mip_offsets = swr_res->secondary_mip_offsets;
+         }
+
+         jit_tex->width = res->width0;
+         jit_tex->height = res->height0;
+         jit_tex->base_ptr = swr->pBaseAddress;
+         if (view->target != PIPE_BUFFER) {
+            jit_tex->first_level = view->u.tex.first_level;
+            jit_tex->last_level = view->u.tex.last_level;
+            if (view->target == PIPE_TEXTURE_3D)
+               jit_tex->depth = res->depth0;
+            else
+               jit_tex->depth =
+                  view->u.tex.last_layer - view->u.tex.first_layer + 1;
+            jit_tex->base_ptr += view->u.tex.first_layer *
+               swr->qpitch * swr->pitch;
+         } else {
+            unsigned view_blocksize = util_format_get_blocksize(view->format);
+            jit_tex->base_ptr += view->u.buf.offset;
+            jit_tex->width = view->u.buf.size / view_blocksize;
+            jit_tex->depth = 1;
+         }
+
+         for (unsigned level = jit_tex->first_level;
+              level <= jit_tex->last_level;
+              level++) {
+            jit_tex->row_stride[level] = swr->pitch;
+            jit_tex->img_stride[level] = swr->qpitch * swr->pitch;
+            jit_tex->mip_offsets[level] = mip_offsets[level];
+         }
+      }
+   }
+}
+
+static void
+swr_update_sampler_state(struct swr_context *ctx,
+                         unsigned shader_type,
+                         unsigned num_samplers,
+                         swr_jit_sampler *samplers)
+{
+   for (unsigned i = 0; i < num_samplers; i++) {
+      const struct pipe_sampler_state *sampler =
+         ctx->samplers[shader_type][i];
+
+      if (sampler) {
+         samplers[i].min_lod = sampler->min_lod;
+         samplers[i].max_lod = sampler->max_lod;
+         samplers[i].lod_bias = sampler->lod_bias;
+         COPY_4V(samplers[i].border_color, sampler->border_color.f);
+      }
+   }
+}
+
+static void
+swr_update_constants(struct swr_context *ctx, enum pipe_shader_type shaderType)
+{
+   swr_draw_context *pDC = &ctx->swrDC;
+
+   const float **constant;
+   uint32_t *num_constants;
+   struct swr_scratch_space *scratch;
+
+   switch (shaderType) {
+   case PIPE_SHADER_VERTEX:
+      constant = pDC->constantVS;
+      num_constants = pDC->num_constantsVS;
+      scratch = &ctx->scratch->vs_constants;
+      break;
+   case PIPE_SHADER_FRAGMENT:
+      constant = pDC->constantFS;
+      num_constants = pDC->num_constantsFS;
+      scratch = &ctx->scratch->fs_constants;
+      break;
+   default:
+      debug_printf("Unsupported shader type constants\n");
+      return;
+   }
+
+   for (UINT i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
+      const pipe_constant_buffer *cb = &ctx->constants[shaderType][i];
+      num_constants[i] = cb->buffer_size;
+      if (cb->buffer) {
+         constant[i] =
+            (const float *)(swr_resource_data(cb->buffer) +
+                            cb->buffer_offset);
+      } else {
+         /* Need to copy these constants to scratch space */
+         if (cb->user_buffer && cb->buffer_size) {
+            const void *ptr =
+               ((const uint8_t *)cb->user_buffer + cb->buffer_offset);
+            uint32_t size = AlignUp(cb->buffer_size, 4);
+            ptr = swr_copy_to_scratch_space(ctx, scratch, ptr, size);
+            constant[i] = (const float *)ptr;
+         }
+      }
+   }
+}
+
+static bool
+swr_change_rt(struct swr_context *ctx,
+              unsigned attachment,
+              const struct pipe_surface *sf)
+{
+   swr_draw_context *pDC = &ctx->swrDC;
+   struct SWR_SURFACE_STATE *rt = &pDC->renderTargets[attachment];
+
+   /* Do nothing if the render target hasn't changed */
+   if ((!sf || !sf->texture) && rt->pBaseAddress == nullptr)
+      return false;
+
+   /* Deal with disabling RT up front */
+   if (!sf || !sf->texture) {
+      /* If detaching attachment, mark tiles as RESOLVED so core
+       * won't try to load from non-existent target. */
+      swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_RESOLVED);
+      *rt = {0};
+      return true;
+   }
+
+   const struct swr_resource *swr = swr_resource(sf->texture);
+   const SWR_SURFACE_STATE *swr_surface = &swr->swr;
+   SWR_FORMAT fmt = mesa_to_swr_format(sf->format);
+
+   if (attachment == SWR_ATTACHMENT_STENCIL && swr->secondary.pBaseAddress) {
+      swr_surface = &swr->secondary;
+      fmt = swr_surface->format;
    }
+
+   if (rt->pBaseAddress == swr_surface->pBaseAddress &&
+       rt->format == fmt &&
+       rt->lod == sf->u.tex.level &&
+       rt->arrayIndex == sf->u.tex.first_layer)
+      return false;
+
+   bool need_fence = false;
+
+   /* StoreTile for changed target */
+   if (rt->pBaseAddress) {
+      /* If changing attachment to a new target, mark tiles as
+       * INVALID so they are reloaded from surface. */
+      swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_INVALID);
+      need_fence = true;
+   }
+
+   /* Make new attachment */
+   *rt = *swr_surface;
+   rt->format = fmt;
+   rt->lod = sf->u.tex.level;
+   rt->arrayIndex = sf->u.tex.first_layer;
+
+   return need_fence;
 }
 
 void
@@ -682,6 +888,10 @@ swr_update_derived(struct pipe_context *pipe,
    struct swr_context *ctx = swr_context(pipe);
    struct swr_screen *screen = swr_screen(ctx->pipe.screen);
 
+   /* Update screen->pipe to current pipe context. */
+   if (screen->pipe != pipe)
+      screen->pipe = pipe;
+
    /* Any state that requires dirty flags to be re-triggered sets this mask */
    /* For example, user_buffer vertex and index buffers. */
    unsigned post_update_dirty_flags = 0;
@@ -689,64 +899,30 @@ swr_update_derived(struct pipe_context *pipe,
    /* Render Targets */
    if (ctx->dirty & SWR_NEW_FRAMEBUFFER) {
       struct pipe_framebuffer_state *fb = &ctx->framebuffer;
-      SWR_SURFACE_STATE *new_attachment[SWR_NUM_ATTACHMENTS] = {0};
-      UINT i;
+      const struct util_format_description *desc = NULL;
+      bool need_fence = false;
 
       /* colorbuffer targets */
-      if (fb->nr_cbufs)
-         for (i = 0; i < fb->nr_cbufs; ++i)
-            if (fb->cbufs[i]) {
-               struct swr_resource *colorBuffer =
-                  swr_resource(fb->cbufs[i]->texture);
-               new_attachment[SWR_ATTACHMENT_COLOR0 + i] = &colorBuffer->swr;
-            }
-
-      /* depth/stencil target */
-      if (fb->zsbuf) {
-         struct swr_resource *depthStencilBuffer =
-            swr_resource(fb->zsbuf->texture);
-         if (depthStencilBuffer->has_depth) {
-            new_attachment[SWR_ATTACHMENT_DEPTH] = &depthStencilBuffer->swr;
-
-            if (depthStencilBuffer->has_stencil)
-               new_attachment[SWR_ATTACHMENT_STENCIL] =
-                  &depthStencilBuffer->secondary;
-
-         } else if (depthStencilBuffer->has_stencil)
-            new_attachment[SWR_ATTACHMENT_STENCIL] = &depthStencilBuffer->swr;
+      if (fb->nr_cbufs) {
+         for (unsigned i = 0; i < fb->nr_cbufs; ++i)
+            need_fence |= swr_change_rt(
+                  ctx, SWR_ATTACHMENT_COLOR0 + i, fb->cbufs[i]);
       }
+      for (unsigned i = fb->nr_cbufs; i < SWR_NUM_RENDERTARGETS; ++i)
+         need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_COLOR0 + i, NULL);
 
-      /* Make the attachment updates */
-      swr_draw_context *pDC = &ctx->swrDC;
-      SWR_SURFACE_STATE *renderTargets = pDC->renderTargets;
-      unsigned need_fence = FALSE;
-      for (i = 0; i < SWR_NUM_ATTACHMENTS; i++) {
-         void *new_base = nullptr;
-         if (new_attachment[i])
-            new_base = new_attachment[i]->pBaseAddress;
-
-         /* StoreTile for changed target */
-         if (renderTargets[i].pBaseAddress != new_base) {
-            if (renderTargets[i].pBaseAddress) {
-               /* If changing attachment to a new target, mark tiles as
-                * INVALID so they are reloaded from surface.
-                * If detaching attachment, mark tiles as RESOLVED so core
-                * won't try to load from non-existent target. */
-               enum SWR_TILE_STATE post_state = (new_attachment[i]
-                  ? SWR_TILE_INVALID : SWR_TILE_RESOLVED);
-               swr_store_render_target(pipe, i, post_state);
-
-               need_fence |= TRUE;
-            }
+      /* depth/stencil target */
+      if (fb->zsbuf)
+         desc = util_format_description(fb->zsbuf->format);
+      if (fb->zsbuf && util_format_has_depth(desc))
+         need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, fb->zsbuf);
+      else
+         need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, NULL);
 
-            /* Make new attachment */
-            if (new_attachment[i])
-               renderTargets[i] = *new_attachment[i];
-            else
-               if (renderTargets[i].pBaseAddress)
-                  renderTargets[i] = {0};
-         }
-      }
+      if (fb->zsbuf && util_format_has_stencil(desc))
+         need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, fb->zsbuf);
+      else
+         need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, NULL);
 
       /* This fence ensures any attachment changes are resolved before the
        * next draw */
@@ -755,7 +931,9 @@ swr_update_derived(struct pipe_context *pipe,
    }
 
    /* Raster state */
-   if (ctx->dirty & (SWR_NEW_RASTERIZER | SWR_NEW_FRAMEBUFFER)) {
+   if (ctx->dirty & (SWR_NEW_RASTERIZER |
+                     SWR_NEW_VS | // clipping
+                     SWR_NEW_FRAMEBUFFER)) {
       pipe_rasterizer_state *rasterizer = ctx->rasterizer;
       pipe_framebuffer_state *fb = &ctx->framebuffer;
 
@@ -782,7 +960,7 @@ swr_update_derived(struct pipe_context *pipe,
       rastState->msaaRastEnable = false;
       rastState->rastMode = SWR_MSAA_RASTMODE_OFF_PIXEL;
       rastState->sampleCount = SWR_MULTISAMPLE_1X;
-      rastState->bForcedSampleCount = false;
+      rastState->forcedSampleCount = false;
 
       bool do_offset = false;
       switch (rasterizer->fill_front) {
@@ -811,16 +989,22 @@ swr_update_derived(struct pipe_context *pipe,
          rastState->depthFormat = swr_resource(zb->texture)->swr.format;
 
       rastState->depthClipEnable = rasterizer->depth_clip;
+      rastState->clipHalfZ = rasterizer->clip_halfz;
+
+      rastState->clipDistanceMask =
+         ctx->vs->info.base.num_written_clipdistance ?
+         ctx->vs->info.base.clipdist_writemask & rasterizer->clip_plane_enable :
+         rasterizer->clip_plane_enable;
+
+      rastState->cullDistanceMask =
+         ctx->vs->info.base.culldist_writemask << ctx->vs->info.base.num_written_clipdistance;
 
       SwrSetRastState(ctx->swrContext, rastState);
    }
 
    /* Scissor */
    if (ctx->dirty & SWR_NEW_SCISSOR) {
-      pipe_scissor_state *scissor = &ctx->scissor;
-      BBOX bbox(scissor->miny, scissor->maxy,
-                scissor->minx, scissor->maxx);
-      SwrSetScissorRects(ctx->swrContext, 1, &bbox);
+      SwrSetScissorRects(ctx->swrContext, 1, &ctx->swr_scissor);
    }
 
    /* Viewport */
@@ -831,33 +1015,34 @@ swr_update_derived(struct pipe_context *pipe,
       pipe_rasterizer_state *rasterizer = ctx->rasterizer;
 
       SWR_VIEWPORT *vp = &ctx->derived.vp;
-      SWR_VIEWPORT_MATRIX *vpm = &ctx->derived.vpm;
+      SWR_VIEWPORT_MATRICES *vpm = &ctx->derived.vpm;
 
       vp->x = state->translate[0] - state->scale[0];
-      vp->width = state->translate[0] + state->scale[0];
+      vp->width = 2 * state->scale[0];
       vp->y = state->translate[1] - fabs(state->scale[1]);
-      vp->height = state->translate[1] + fabs(state->scale[1]);
-      if (rasterizer->clip_halfz == 0) {
-         vp->minZ = state->translate[2] - state->scale[2];
-         vp->maxZ = state->translate[2] + state->scale[2];
-      } else {
-         vp->minZ = state->translate[2];
-         vp->maxZ = state->translate[2] + state->scale[2];
-      }
+      vp->height = 2 * fabs(state->scale[1]);
+      util_viewport_zmin_zmax(state, rasterizer->clip_halfz,
+                              &vp->minZ, &vp->maxZ);
 
-      vpm->m00 = state->scale[0];
-      vpm->m11 = state->scale[1];
-      vpm->m22 = state->scale[2];
-      vpm->m30 = state->translate[0];
-      vpm->m31 = state->translate[1];
-      vpm->m32 = state->translate[2];
+      vpm->m00[0] = state->scale[0];
+      vpm->m11[0] = state->scale[1];
+      vpm->m22[0] = state->scale[2];
+      vpm->m30[0] = state->translate[0];
+      vpm->m31[0] = state->translate[1];
+      vpm->m32[0] = state->translate[2];
 
       /* Now that the matrix is calculated, clip the view coords to screen
        * size.  OpenGL allows for -ve x,y in the viewport. */
-      vp->x = std::max(vp->x, 0.0f);
-      vp->y = std::max(vp->y, 0.0f);
-      vp->width = std::min(vp->width, (float)fb->width);
-      vp->height = std::min(vp->height, (float)fb->height);
+      if (vp->x < 0.0f) {
+         vp->width += vp->x;
+         vp->x = 0.0f;
+      }
+      if (vp->y < 0.0f) {
+         vp->height += vp->y;
+         vp->y = 0.0f;
+      }
+      vp->width = std::min(vp->width, (float)fb->width - vp->x);
+      vp->height = std::min(vp->height, (float)fb->height - vp->y);
 
       SwrSetViewports(ctx->swrContext, 1, vp, vpm);
    }
@@ -865,14 +1050,39 @@ swr_update_derived(struct pipe_context *pipe,
    /* Set vertex & index buffers */
    /* (using draw info if called by swr_draw_vbo) */
    if (ctx->dirty & SWR_NEW_VERTEX) {
-      uint32_t size, pitch, max_vertex, partial_inbounds;
+      uint32_t size, pitch, max_vertex, partial_inbounds, scratch_total;
       const uint8_t *p_data;
+      uint8_t *scratch = NULL;
 
       /* If being called by swr_draw_vbo, copy draw details */
       struct pipe_draw_info info = {0};
       if (p_draw_info)
          info = *p_draw_info;
 
+      /* We must get all the scratch space in one go */
+      scratch_total = 0;
+      for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
+         struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
+
+         if (!vb->user_buffer)
+            continue;
+
+         if (vb->stride) {
+            size = (info.max_index - info.min_index + 1) * vb->stride;
+         } else {
+            /* pitch = 0, means constant value
+             * set size to 1 vertex */
+            size = ctx->velems->stream_pitch[i];
+         }
+
+         scratch_total += AlignUp(size, 4);
+      }
+
+      if (scratch_total) {
+         scratch = (uint8_t *)swr_copy_to_scratch_space(
+               ctx, &ctx->scratch->vertex_buffer, NULL, scratch_total);
+      }
+
       /* vertex buffers */
       SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS];
       for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
@@ -887,8 +1097,7 @@ swr_update_derived(struct pipe_context *pipe,
             max_vertex = size / pitch;
             partial_inbounds = size % pitch;
 
-            p_data = (const uint8_t *)swr_resource_data(vb->buffer)
-               + vb->buffer_offset;
+            p_data = swr_resource_data(vb->buffer) + vb->buffer_offset;
          } else {
             /* Client buffer
              * client memory is one-time use, re-trigger SWR_NEW_VERTEX to
@@ -910,8 +1119,9 @@ swr_update_derived(struct pipe_context *pipe,
             size = AlignUp(size, 4);
             const void *ptr = (const uint8_t *) vb->user_buffer
                + info.min_index * pitch;
-            ptr = swr_copy_to_scratch_space(
-               ctx, &ctx->scratch->vertex_buffer, ptr, size);
+            memcpy(scratch, ptr, size);
+            ptr = scratch;
+            scratch += size;
             p_data = (const uint8_t *)ptr - info.min_index * pitch;
          }
 
@@ -940,8 +1150,7 @@ swr_update_derived(struct pipe_context *pipe,
              * size is based on buffer->width0 rather than info.count
              * to prevent having to validate VBO on each draw */
             size = ib->buffer->width0;
-            p_data =
-               (const uint8_t *)swr_resource_data(ib->buffer) + ib->offset;
+            p_data = swr_resource_data(ib->buffer) + ib->offset;
          } else {
             /* Client buffer
              * client memory is one-time use, re-trigger SWR_NEW_VERTEX to
@@ -974,22 +1183,50 @@ swr_update_derived(struct pipe_context *pipe,
    }
 
    /* VertexShader */
-   if (ctx->dirty & (SWR_NEW_VS | SWR_NEW_FRAMEBUFFER)) {
-      SwrSetVertexFunc(ctx->swrContext, ctx->vs->func);
+   if (ctx->dirty & (SWR_NEW_VS |
+                     SWR_NEW_RASTERIZER | // for clip planes
+                     SWR_NEW_SAMPLER |
+                     SWR_NEW_SAMPLER_VIEW |
+                     SWR_NEW_FRAMEBUFFER)) {
+      swr_jit_vs_key key;
+      swr_generate_vs_key(key, ctx, ctx->vs);
+      auto search = ctx->vs->map.find(key);
+      PFN_VERTEX_FUNC func;
+      if (search != ctx->vs->map.end()) {
+         func = search->second->shader;
+      } else {
+         func = swr_compile_vs(ctx, key);
+      }
+      SwrSetVertexFunc(ctx->swrContext, func);
+
+      /* JIT sampler state */
+      if (ctx->dirty & SWR_NEW_SAMPLER) {
+         swr_update_sampler_state(ctx,
+                                  PIPE_SHADER_VERTEX,
+                                  key.nr_samplers,
+                                  ctx->swrDC.samplersVS);
+      }
+
+      /* JIT sampler view state */
+      if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
+         swr_update_texture_state(ctx,
+                                  PIPE_SHADER_VERTEX,
+                                  key.nr_sampler_views,
+                                  ctx->swrDC.texturesVS);
+      }
    }
 
-   swr_jit_key key;
+   /* FragmentShader */
    if (ctx->dirty & (SWR_NEW_FS | SWR_NEW_SAMPLER | SWR_NEW_SAMPLER_VIEW
                      | SWR_NEW_RASTERIZER | SWR_NEW_FRAMEBUFFER)) {
-      memset(&key, 0, sizeof(key));
+      swr_jit_fs_key key;
       swr_generate_fs_key(key, ctx, ctx->fs);
       auto search = ctx->fs->map.find(key);
       PFN_PIXEL_KERNEL func;
       if (search != ctx->fs->map.end()) {
-         func = search->second;
+         func = search->second->shader;
       } else {
          func = swr_compile_fs(ctx, key);
-         ctx->fs->map.insert(std::make_pair(key, func));
       }
       SWR_PS_STATE psState = {0};
       psState.pfnPixelShader = func;
@@ -1031,104 +1268,33 @@ swr_update_derived(struct pipe_context *pipe,
       psState.usesUAV = false; // XXX
       psState.forceEarlyZ = false;
       SwrSetPixelShaderState(ctx->swrContext, &psState);
-   }
-
-   /* JIT sampler state */
-   if (ctx->dirty & SWR_NEW_SAMPLER) {
-      swr_draw_context *pDC = &ctx->swrDC;
-
-      for (unsigned i = 0; i < key.nr_samplers; i++) {
-         const struct pipe_sampler_state *sampler =
-            ctx->samplers[PIPE_SHADER_FRAGMENT][i];
 
-         if (sampler) {
-            pDC->samplersFS[i].min_lod = sampler->min_lod;
-            pDC->samplersFS[i].max_lod = sampler->max_lod;
-            pDC->samplersFS[i].lod_bias = sampler->lod_bias;
-            COPY_4V(pDC->samplersFS[i].border_color, sampler->border_color.f);
-         }
+      /* JIT sampler state */
+      if (ctx->dirty & SWR_NEW_SAMPLER) {
+         swr_update_sampler_state(ctx,
+                                  PIPE_SHADER_FRAGMENT,
+                                  key.nr_samplers,
+                                  ctx->swrDC.samplersFS);
       }
-   }
 
-   /* JIT sampler view state */
-   if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
-      swr_draw_context *pDC = &ctx->swrDC;
-
-      for (unsigned i = 0; i < key.nr_sampler_views; i++) {
-         struct pipe_sampler_view *view =
-            ctx->sampler_views[PIPE_SHADER_FRAGMENT][i];
-
-         if (view) {
-            struct pipe_resource *res = view->texture;
-            struct swr_resource *swr_res = swr_resource(res);
-            struct swr_jit_texture *jit_tex = &pDC->texturesFS[i];
-            memset(jit_tex, 0, sizeof(*jit_tex));
-            jit_tex->width = res->width0;
-            jit_tex->height = res->height0;
-            jit_tex->depth = res->depth0;
-            jit_tex->first_level = view->u.tex.first_level;
-            jit_tex->last_level = view->u.tex.last_level;
-            jit_tex->base_ptr = swr_res->swr.pBaseAddress;
-
-            for (unsigned level = jit_tex->first_level;
-                 level <= jit_tex->last_level;
-                 level++) {
-               jit_tex->row_stride[level] = swr_res->row_stride[level];
-               jit_tex->img_stride[level] = swr_res->img_stride[level];
-               jit_tex->mip_offsets[level] = swr_res->mip_offsets[level];
-            }
-         }
+      /* JIT sampler view state */
+      if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
+         swr_update_texture_state(ctx,
+                                  PIPE_SHADER_FRAGMENT,
+                                  key.nr_sampler_views,
+                                  ctx->swrDC.texturesFS);
       }
    }
 
+
    /* VertexShader Constants */
    if (ctx->dirty & SWR_NEW_VSCONSTANTS) {
-      swr_draw_context *pDC = &ctx->swrDC;
-
-      for (UINT i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
-         const pipe_constant_buffer *cb =
-            &ctx->constants[PIPE_SHADER_VERTEX][i];
-         pDC->num_constantsVS[i] = cb->buffer_size;
-         if (cb->buffer)
-            pDC->constantVS[i] =
-               (const float *)((const uint8_t *)cb->buffer + cb->buffer_offset);
-         else {
-            /* Need to copy these constants to scratch space */
-            if (cb->user_buffer && cb->buffer_size) {
-               const void *ptr =
-                  ((const uint8_t *)cb->user_buffer + cb->buffer_offset);
-               uint32_t size = AlignUp(cb->buffer_size, 4);
-               ptr = swr_copy_to_scratch_space(
-                  ctx, &ctx->scratch->vs_constants, ptr, size);
-               pDC->constantVS[i] = (const float *)ptr;
-            }
-         }
-      }
+      swr_update_constants(ctx, PIPE_SHADER_VERTEX);
    }
 
    /* FragmentShader Constants */
    if (ctx->dirty & SWR_NEW_FSCONSTANTS) {
-      swr_draw_context *pDC = &ctx->swrDC;
-
-      for (UINT i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
-         const pipe_constant_buffer *cb =
-            &ctx->constants[PIPE_SHADER_FRAGMENT][i];
-         pDC->num_constantsFS[i] = cb->buffer_size;
-         if (cb->buffer)
-            pDC->constantFS[i] =
-               (const float *)((const uint8_t *)cb->buffer + cb->buffer_offset);
-         else {
-            /* Need to copy these constants to scratch space */
-            if (cb->user_buffer && cb->buffer_size) {
-               const void *ptr =
-                  ((const uint8_t *)cb->user_buffer + cb->buffer_offset);
-               uint32_t size = AlignUp(cb->buffer_size, 4);
-               ptr = swr_copy_to_scratch_space(
-                  ctx, &ctx->scratch->fs_constants, ptr, size);
-               pDC->constantFS[i] = (const float *)ptr;
-            }
-         }
-      }
+      swr_update_constants(ctx, PIPE_SHADER_FRAGMENT);
    }
 
    /* Depth/stencil state */
@@ -1136,6 +1302,7 @@ swr_update_derived(struct pipe_context *pipe,
       struct pipe_depth_state *depth = &(ctx->depth_stencil->depth);
       struct pipe_stencil_state *stencil = ctx->depth_stencil->stencil;
       SWR_DEPTH_STENCIL_STATE depthStencilState = {{0}};
+      SWR_DEPTH_BOUNDS_STATE depthBoundsState = {0};
 
       /* XXX, incomplete.  Need to flesh out stencil & alpha test state
       struct pipe_stencil_state *front_stencil =
@@ -1182,6 +1349,11 @@ swr_update_derived(struct pipe_context *pipe,
       depthStencilState.depthTestFunc = swr_convert_depth_func(depth->func);
       depthStencilState.depthWriteEnable = depth->writemask;
       SwrSetDepthStencilState(ctx->swrContext, &depthStencilState);
+
+      depthBoundsState.depthBoundsTestEnable = depth->bounds_test;
+      depthBoundsState.depthBoundsTestMinValue = depth->bounds_min;
+      depthBoundsState.depthBoundsTestMaxValue = depth->bounds_max;
+      SwrSetDepthBoundsState(ctx->swrContext, &depthBoundsState);
    }
 
    /* Blend State */
@@ -1230,8 +1402,18 @@ swr_update_derived(struct pipe_context *pipe,
                    &ctx->blend->compileState[target],
                    sizeof(compileState.blendState));
 
+            const SWR_FORMAT_INFO& info = GetFormatInfo(compileState.format);
+            if (compileState.blendState.logicOpEnable &&
+                ((info.type[0] == SWR_TYPE_FLOAT) || info.isSRGB)) {
+               compileState.blendState.logicOpEnable = false;
+            }
+
+            if (info.type[0] == SWR_TYPE_SINT || info.type[0] == SWR_TYPE_UINT)
+               compileState.blendState.blendEnable = false;
+
             if (compileState.blendState.blendEnable == false &&
-                compileState.blendState.logicOpEnable == false) {
+                compileState.blendState.logicOpEnable == false &&
+                ctx->depth_stencil->alpha.enabled == 0) {
                SwrSetBlendFunc(ctx->swrContext, target, NULL);
                continue;
             }
@@ -1239,7 +1421,12 @@ swr_update_derived(struct pipe_context *pipe,
             compileState.desc.alphaTestEnable =
                ctx->depth_stencil->alpha.enabled;
             compileState.desc.independentAlphaBlendEnable =
-               ctx->blend->pipe.independent_blend_enable;
+               (compileState.blendState.sourceBlendFactor !=
+                compileState.blendState.sourceAlphaBlendFactor) ||
+               (compileState.blendState.destBlendFactor !=
+                compileState.blendState.destAlphaBlendFactor) ||
+               (compileState.blendState.colorBlendFunc !=
+                compileState.blendState.alphaBlendFunc);
             compileState.desc.alphaToCoverageEnable =
                ctx->blend->pipe.alpha_to_coverage;
             compileState.desc.sampleMaskEnable = 0; // XXX
@@ -1249,6 +1436,8 @@ swr_update_derived(struct pipe_context *pipe,
                swr_convert_depth_func(ctx->depth_stencil->alpha.func);
             compileState.alphaTestFormat = ALPHA_TEST_FLOAT32; // xxx
 
+            compileState.Canonicalize();
+            
             PFN_BLEND_JIT_FUNC func = NULL;
             auto search = ctx->blendJIT->find(compileState);
             if (search != ctx->blendJIT->end()) {
@@ -1293,28 +1482,36 @@ swr_update_derived(struct pipe_context *pipe,
       }
    }
 
-   uint32_t linkage = ctx->vs->linkageMask;
-   if (ctx->rasterizer->sprite_coord_enable)
-      linkage |= (1 << ctx->vs->info.base.num_outputs);
-
-   SwrSetLinkage(ctx->swrContext, linkage, NULL);
-
-   // set up frontend state
-   SWR_FRONTEND_STATE feState = {0};
-   SwrSetFrontendState(ctx->swrContext, &feState);
+   if (ctx->dirty & SWR_NEW_CLIP) {
+      // shader exporting clip distances overrides all user clip planes
+      if (ctx->rasterizer->clip_plane_enable &&
+          !ctx->vs->info.base.num_written_clipdistance)
+      {
+         swr_draw_context *pDC = &ctx->swrDC;
+         memcpy(pDC->userClipPlanes,
+                ctx->clip.ucp,
+                sizeof(pDC->userClipPlanes));
+      }
+   }
 
    // set up backend state
    SWR_BACKEND_STATE backendState = {0};
-   backendState.numAttributes = 1;
-   backendState.numComponents[0] = 4;
-   backendState.constantInterpolationMask = ctx->fs->constantMask;
+   backendState.numAttributes =
+      ctx->vs->info.base.num_outputs - 1 +
+      (ctx->rasterizer->sprite_coord_enable ? 1 : 0);
+   for (unsigned i = 0; i < backendState.numAttributes; i++)
+      backendState.numComponents[i] = 4;
+   backendState.constantInterpolationMask =
+      ctx->rasterizer->flatshade ?
+      ctx->fs->flatConstantMask :
+      ctx->fs->constantMask;
    backendState.pointSpriteTexCoordMask = ctx->fs->pointSpriteMask;
 
    SwrSetBackendState(ctx->swrContext, &backendState);
 
    /* Ensure that any in-progress attachment change StoreTiles finish */
    if (swr_is_fence_pending(screen->flush_fence))
-      swr_fence_finish(pipe->screen, screen->flush_fence, 0);
+      swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
 
    /* Finally, update the in-use status of all resources involved in draw */
    swr_update_resource_status(pipe, p_draw_info);