gallium/swr: add OpenSWR driver
authorTim Rowley <timothy.o.rowley@intel.com>
Tue, 16 Feb 2016 23:27:28 +0000 (17:27 -0600)
committerTim Rowley <timothy.o.rowley@intel.com>
Thu, 3 Mar 2016 00:38:41 +0000 (18:38 -0600)
OpenSWR is a new software rasterizer for x86 processors designed
for high performance and high scalablility on visualization workloads.

Acked-by: Roland Scheidegger <sroland@vmware.com>
Acked-by: Jose Fonseca <jfonseca@vmware.com>
23 files changed:
src/gallium/drivers/swr/swr_clear.cpp [new file with mode: 0644]
src/gallium/drivers/swr/swr_context.cpp [new file with mode: 0644]
src/gallium/drivers/swr/swr_context.h [new file with mode: 0644]
src/gallium/drivers/swr/swr_context_llvm.h [new file with mode: 0644]
src/gallium/drivers/swr/swr_draw.cpp [new file with mode: 0644]
src/gallium/drivers/swr/swr_fence.cpp [new file with mode: 0644]
src/gallium/drivers/swr/swr_fence.h [new file with mode: 0644]
src/gallium/drivers/swr/swr_loader.cpp [new file with mode: 0644]
src/gallium/drivers/swr/swr_memory.h [new file with mode: 0644]
src/gallium/drivers/swr/swr_public.h [new file with mode: 0644]
src/gallium/drivers/swr/swr_query.cpp [new file with mode: 0644]
src/gallium/drivers/swr/swr_query.h [new file with mode: 0644]
src/gallium/drivers/swr/swr_resource.h [new file with mode: 0644]
src/gallium/drivers/swr/swr_scratch.cpp [new file with mode: 0644]
src/gallium/drivers/swr/swr_scratch.h [new file with mode: 0644]
src/gallium/drivers/swr/swr_screen.cpp [new file with mode: 0644]
src/gallium/drivers/swr/swr_screen.h [new file with mode: 0644]
src/gallium/drivers/swr/swr_shader.cpp [new file with mode: 0644]
src/gallium/drivers/swr/swr_shader.h [new file with mode: 0644]
src/gallium/drivers/swr/swr_state.cpp [new file with mode: 0644]
src/gallium/drivers/swr/swr_state.h [new file with mode: 0644]
src/gallium/drivers/swr/swr_tex_sample.cpp [new file with mode: 0644]
src/gallium/drivers/swr/swr_tex_sample.h [new file with mode: 0644]

diff --git a/src/gallium/drivers/swr/swr_clear.cpp b/src/gallium/drivers/swr/swr_clear.cpp
new file mode 100644 (file)
index 0000000..9027f84
--- /dev/null
@@ -0,0 +1,142 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#include "swr_context.h"
+#include "swr_query.h"
+
+static void
+swr_clear(struct pipe_context *pipe,
+          unsigned buffers,
+          const union pipe_color_union *color,
+          double depth,
+          unsigned stencil)
+{
+   struct swr_context *ctx = swr_context(pipe);
+   struct pipe_framebuffer_state *fb = &ctx->framebuffer;
+
+   UINT clearMask = 0;
+
+   if (!swr_check_render_cond(pipe))
+      return;
+
+   if (ctx->dirty)
+      swr_update_derived(ctx);
+
+/* Update clearMask/targetMask */
+#if 0 /* XXX SWR currently only clears SWR_ATTACHMENT_COLOR0, don't bother   \
+         checking others yet. */
+   if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
+      UINT i;
+      for (i = 0; i < fb->nr_cbufs; ++i)
+         if (fb->cbufs[i])
+            clearMask |= (SWR_CLEAR_COLOR0 << i);
+   }
+#else
+   if (buffers & PIPE_CLEAR_COLOR && fb->cbufs[0])
+      clearMask |= SWR_CLEAR_COLOR;
+#endif
+
+   if (buffers & PIPE_CLEAR_DEPTH && fb->zsbuf)
+      clearMask |= SWR_CLEAR_DEPTH;
+
+   if (buffers & PIPE_CLEAR_STENCIL && fb->zsbuf)
+      clearMask |= SWR_CLEAR_STENCIL;
+
+#if 0 // XXX HACK, override clear color alpha. On ubuntu, clears are
+      // transparent.
+   ((union pipe_color_union *)color)->f[3] = 1.0; /* cast off your const'd-ness */
+#endif
+
+   /* Reset viewport to full framebuffer width/height before clear, then
+    * restore it  */
+   /* Scissor affects clear, viewport should not */
+   ctx->dirty |= SWR_NEW_VIEWPORT;
+   SWR_VIEWPORT vp = {0};
+   vp.width = ctx->framebuffer.width;
+   vp.height = ctx->framebuffer.height;
+   SwrSetViewports(ctx->swrContext, 1, &vp, NULL);
+
+       swr_update_draw_context(ctx);
+   SwrClearRenderTarget(ctx->swrContext, clearMask, color->f, depth, stencil);
+}
+
+
+#if 0 // XXX, these don't get called. how to get these called?  Do we need
+      // them?  Docs?
+static void
+swr_clear_render_target(struct pipe_context *pipe, struct pipe_surface *ps,
+                        const union pipe_color_union *color,
+                        unsigned x, unsigned y, unsigned w, unsigned h)
+{
+   struct swr_context *ctx = swr_context(pipe);
+   fprintf(stderr, "SWR swr_clear_render_target!\n");
+
+   ctx->dirty |= SWR_NEW_FRAMEBUFFER | SWR_NEW_SCISSOR;
+}
+
+static void
+swr_clear_depth_stencil(struct pipe_context *pipe, struct pipe_surface *ps,
+                        unsigned buffers, double depth, unsigned stencil,
+                        unsigned x, unsigned y, unsigned w, unsigned h)
+{
+   struct swr_context *ctx = swr_context(pipe);
+   fprintf(stderr, "SWR swr_clear_depth_stencil!\n");
+
+   ctx->dirty |= SWR_NEW_FRAMEBUFFER | SWR_NEW_SCISSOR;
+}
+
+static void
+swr_clear_buffer(struct pipe_context *pipe,
+                 struct pipe_resource *res,
+                 unsigned offset, unsigned size,
+                 const void *data, int data_size)
+{
+   fprintf(stderr, "SWR swr_clear_buffer!\n");
+   struct swr_context *ctx = swr_context(pipe);
+   struct swr_resource *buf = swr_resource(res);
+   union pipe_color_union color;
+   enum pipe_format dst_fmt;
+   unsigned width, height, elements;
+
+   assert(res->target == PIPE_BUFFER);
+   assert(buf);
+   assert(size % data_size == 0);
+
+   SWR_SURFACE_STATE &swr_buffer = buf->swr;
+
+   ctx->dirty |= SWR_NEW_FRAMEBUFFER | SWR_NEW_SCISSOR;
+}
+#endif
+
+
+void
+swr_clear_init(struct pipe_context *pipe)
+{
+   pipe->clear = swr_clear;
+#if 0 // XXX, these don't get called. how to get these called?  Do we need
+      // them?  Docs?
+   pipe->clear_render_target = swr_clear_render_target;
+   pipe->clear_depth_stencil = swr_clear_depth_stencil;
+   pipe->clear_buffer = swr_clear_buffer;
+#endif
+}
diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp
new file mode 100644 (file)
index 0000000..0e7ebb7
--- /dev/null
@@ -0,0 +1,407 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+extern "C" {
+#include "util/u_transfer.h"
+#include "util/u_surface.h"
+}
+
+#include "swr_context.h"
+#include "swr_memory.h"
+#include "swr_screen.h"
+#include "swr_resource.h"
+#include "swr_scratch.h"
+#include "swr_query.h"
+
+#include "api.h"
+#include "backend.h"
+
+static struct pipe_surface *
+swr_create_surface(struct pipe_context *pipe,
+                   struct pipe_resource *pt,
+                   const struct pipe_surface *surf_tmpl)
+{
+   struct pipe_surface *ps;
+
+   ps = CALLOC_STRUCT(pipe_surface);
+   if (ps) {
+      pipe_reference_init(&ps->reference, 1);
+      pipe_resource_reference(&ps->texture, pt);
+      ps->context = pipe;
+      ps->format = surf_tmpl->format;
+      if (pt->target != PIPE_BUFFER) {
+         assert(surf_tmpl->u.tex.level <= pt->last_level);
+         ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level);
+         ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level);
+         ps->u.tex.level = surf_tmpl->u.tex.level;
+         ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+         ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+         if (ps->u.tex.first_layer != ps->u.tex.last_layer) {
+            debug_printf("creating surface with multiple layers, rendering "
+                         "to first layer only\n");
+         }
+      } else {
+         /* setting width as number of elements should get us correct
+          * renderbuffer width */
+         ps->width = surf_tmpl->u.buf.last_element
+            - surf_tmpl->u.buf.first_element + 1;
+         ps->height = pt->height0;
+         ps->u.buf.first_element = surf_tmpl->u.buf.first_element;
+         ps->u.buf.last_element = surf_tmpl->u.buf.last_element;
+         assert(ps->u.buf.first_element <= ps->u.buf.last_element);
+         assert(ps->u.buf.last_element < ps->width);
+      }
+   }
+   return ps;
+}
+
+static void
+swr_surface_destroy(struct pipe_context *pipe, struct pipe_surface *surf)
+{
+   assert(surf->texture);
+   struct pipe_resource *resource = surf->texture;
+
+   /* If the surface being destroyed is a current render target,
+    * call StoreTiles to resolve the hotTile state then set attachment
+    * to NULL.
+    */
+   if (resource->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL
+                         | PIPE_BIND_DISPLAY_TARGET)) {
+      struct swr_context *ctx = swr_context(pipe);
+      struct swr_resource *spr = swr_resource(resource);
+               swr_draw_context *pDC = &ctx->swrDC;
+               SWR_SURFACE_STATE *renderTargets = pDC->renderTargets;
+      for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++)
+         if (renderTargets[i].pBaseAddress == spr->swr.pBaseAddress) {
+            swr_store_render_target(ctx, i, SWR_TILE_RESOLVED);
+
+            /*
+             * Mesa thinks depth/stencil are fused, so we'll never get an
+             * explicit resource for stencil.  So, if checking depth, then
+             * also check for stencil.
+             */
+            if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH)) {
+               swr_store_render_target(
+                  ctx, SWR_ATTACHMENT_STENCIL, SWR_TILE_RESOLVED);
+            }
+
+            SwrWaitForIdle(ctx->swrContext);
+            break;
+         }
+   }
+
+   pipe_resource_reference(&surf->texture, NULL);
+   FREE(surf);
+}
+
+
+static void *
+swr_transfer_map(struct pipe_context *pipe,
+                 struct pipe_resource *resource,
+                 unsigned level,
+                 unsigned usage,
+                 const struct pipe_box *box,
+                 struct pipe_transfer **transfer)
+{
+   struct swr_resource *spr = swr_resource(resource);
+   struct pipe_transfer *pt;
+   enum pipe_format format = resource->format;
+
+   assert(resource);
+   assert(level <= resource->last_level);
+
+   /*
+    * If mapping any attached rendertarget, store tiles and wait for idle
+    * before giving CPU access to the surface.
+    * (set postStoreTileState to SWR_TILE_INVALID so tiles are reloaded)
+    */
+   if (resource->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL
+                         | PIPE_BIND_DISPLAY_TARGET)) {
+      struct swr_context *ctx = swr_context(pipe);
+      swr_draw_context *pDC = &ctx->swrDC;
+      SWR_SURFACE_STATE *renderTargets = pDC->renderTargets;
+      for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++)
+         if (renderTargets[i].pBaseAddress == spr->swr.pBaseAddress) {
+            swr_store_render_target(ctx, i, SWR_TILE_INVALID);
+            /*
+             * Mesa thinks depth/stencil are fused, so we'll never get an
+             * explicit map for stencil.  So, if mapping depth, then also
+             * store tile for stencil.
+             */
+            if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH))
+               swr_store_render_target(
+                  ctx, SWR_ATTACHMENT_STENCIL, SWR_TILE_INVALID);
+            SwrWaitForIdle(ctx->swrContext);
+            break;
+         }
+   }
+
+   pt = CALLOC_STRUCT(pipe_transfer);
+   if (!pt)
+      return NULL;
+   pipe_resource_reference(&pt->resource, resource);
+   pt->level = level;
+   pt->box = *box;
+   pt->stride = spr->row_stride[level];
+   pt->layer_stride = spr->img_stride[level];
+
+   /* if we're mapping the depth/stencil, copy in stencil */
+   if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT
+       && spr->has_stencil) {
+      for (unsigned i = 0; i < spr->alignedWidth * spr->alignedHeight; i++) {
+         spr->swr.pBaseAddress[4 * i + 3] = spr->secondary.pBaseAddress[i];
+      }
+   } else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
+              && spr->has_stencil) {
+      for (unsigned i = 0; i < spr->alignedWidth * spr->alignedHeight; i++) {
+         spr->swr.pBaseAddress[8 * i + 4] = spr->secondary.pBaseAddress[i];
+      }
+   }
+
+   unsigned offset = box->z * pt->layer_stride + box->y * pt->stride
+      + box->x * util_format_get_blocksize(format);
+
+   *transfer = pt;
+
+   return spr->swr.pBaseAddress + offset + spr->mip_offsets[level];
+}
+
+static void
+swr_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer)
+{
+   assert(transfer->resource);
+
+   /*
+    * XXX TODO: use fences and come up with a real resource manager.
+    *
+    * If this resource has been mapped/unmapped, it's probably in use.  Tag it
+    *with this context so
+    * we'll know to check dependencies when it's deleted.
+    */
+   struct swr_resource *res = swr_resource(transfer->resource);
+   res->bound_to_context = (void *)pipe;
+
+   /* if we're mapping the depth/stencil, copy out stencil */
+   if (res->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT
+       && res->has_stencil) {
+      for (unsigned i = 0; i < res->alignedWidth * res->alignedHeight; i++) {
+         res->secondary.pBaseAddress[i] = res->swr.pBaseAddress[4 * i + 3];
+      }
+   } else if (res->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
+              && res->has_stencil) {
+      for (unsigned i = 0; i < res->alignedWidth * res->alignedHeight; i++) {
+         res->secondary.pBaseAddress[i] = res->swr.pBaseAddress[8 * i + 4];
+      }
+   }
+
+   pipe_resource_reference(&transfer->resource, NULL);
+   FREE(transfer);
+}
+
+
+static void
+swr_resource_copy(struct pipe_context *pipe,
+                  struct pipe_resource *dst,
+                  unsigned dst_level,
+                  unsigned dstx,
+                  unsigned dsty,
+                  unsigned dstz,
+                  struct pipe_resource *src,
+                  unsigned src_level,
+                  const struct pipe_box *src_box)
+{
+   if ((dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER)
+       || (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER)) {
+      util_resource_copy_region(
+         pipe, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box);
+      return;
+   }
+
+   debug_printf("unhandled swr_resource_copy\n");
+}
+
+
+static void
+swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info)
+{
+   struct swr_context *ctx = swr_context(pipe);
+   struct pipe_blit_info info = *blit_info;
+
+   if (blit_info->render_condition_enable && !swr_check_render_cond(pipe))
+      return;
+
+   if (info.src.resource->nr_samples > 1 && info.dst.resource->nr_samples <= 1
+       && !util_format_is_depth_or_stencil(info.src.resource->format)
+       && !util_format_is_pure_integer(info.src.resource->format)) {
+      debug_printf("swr: color resolve unimplemented\n");
+      return;
+   }
+
+   if (util_try_blit_via_copy_region(pipe, &info)) {
+      return; /* done */
+   }
+
+   if (info.mask & PIPE_MASK_S) {
+      debug_printf("swr: cannot blit stencil, skipping\n");
+      info.mask &= ~PIPE_MASK_S;
+   }
+
+   if (!util_blitter_is_blit_supported(ctx->blitter, &info)) {
+      debug_printf("swr: blit unsupported %s -> %s\n",
+                   util_format_short_name(info.src.resource->format),
+                   util_format_short_name(info.dst.resource->format));
+      return;
+   }
+
+   /* XXX turn off occlusion and streamout queries */
+
+   util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vertex_buffer);
+   util_blitter_save_vertex_elements(ctx->blitter, (void *)ctx->velems);
+   util_blitter_save_vertex_shader(ctx->blitter, (void *)ctx->vs);
+   /*util_blitter_save_geometry_shader(ctx->blitter, (void*)ctx->gs);*/
+   util_blitter_save_so_targets(
+      ctx->blitter,
+      ctx->num_so_targets,
+      (struct pipe_stream_output_target **)ctx->so_targets);
+   util_blitter_save_rasterizer(ctx->blitter, (void *)ctx->rasterizer);
+   util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
+   util_blitter_save_scissor(ctx->blitter, &ctx->scissor);
+   util_blitter_save_fragment_shader(ctx->blitter, ctx->fs);
+   util_blitter_save_blend(ctx->blitter, (void *)ctx->blend);
+   util_blitter_save_depth_stencil_alpha(ctx->blitter,
+                                         (void *)ctx->depth_stencil);
+   util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);
+   util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask);
+   util_blitter_save_framebuffer(ctx->blitter, &ctx->framebuffer);
+   util_blitter_save_fragment_sampler_states(
+      ctx->blitter,
+      ctx->num_samplers[PIPE_SHADER_FRAGMENT],
+      (void **)ctx->samplers[PIPE_SHADER_FRAGMENT]);
+   util_blitter_save_fragment_sampler_views(
+      ctx->blitter,
+      ctx->num_sampler_views[PIPE_SHADER_FRAGMENT],
+      ctx->sampler_views[PIPE_SHADER_FRAGMENT]);
+   util_blitter_save_render_condition(ctx->blitter,
+                                      ctx->render_cond_query,
+                                      ctx->render_cond_cond,
+                                      ctx->render_cond_mode);
+
+   util_blitter_blit(ctx->blitter, &info);
+}
+
+
+static void
+swr_destroy(struct pipe_context *pipe)
+{
+   struct swr_context *ctx = swr_context(pipe);
+
+   if (ctx->blitter)
+      util_blitter_destroy(ctx->blitter);
+
+   if (ctx->swrContext)
+      SwrDestroyContext(ctx->swrContext);
+
+   delete ctx->blendJIT;
+
+   swr_destroy_scratch_buffers(ctx);
+
+   FREE(ctx);
+}
+
+
+static void
+swr_render_condition(struct pipe_context *pipe,
+                     struct pipe_query *query,
+                     boolean condition,
+                     uint mode)
+{
+   struct swr_context *ctx = swr_context(pipe);
+
+   ctx->render_cond_query = query;
+   ctx->render_cond_mode = mode;
+   ctx->render_cond_cond = condition;
+}
+
+
+struct pipe_context *
+swr_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
+{
+   struct swr_context *ctx = CALLOC_STRUCT(swr_context);
+   ctx->blendJIT =
+      new std::unordered_map<BLEND_COMPILE_STATE, PFN_BLEND_JIT_FUNC>;
+
+   SWR_CREATECONTEXT_INFO createInfo;
+   createInfo.driver = GL;
+   createInfo.privateStateSize = sizeof(swr_draw_context);
+   createInfo.maxSubContexts = 0;
+   createInfo.pfnLoadTile = swr_LoadHotTile;
+   createInfo.pfnStoreTile = swr_StoreHotTile;
+   createInfo.pfnClearTile = swr_StoreHotTileClear;
+   ctx->swrContext = SwrCreateContext(&createInfo);
+
+   /* Init Load/Store/ClearTiles Tables */
+   swr_InitMemoryModule();
+
+   InitBackendFuncTables();
+
+   if (ctx->swrContext == NULL)
+      goto fail;
+
+   ctx->pipe.screen = screen;
+   ctx->pipe.destroy = swr_destroy;
+   ctx->pipe.priv = priv;
+   ctx->pipe.create_surface = swr_create_surface;
+   ctx->pipe.surface_destroy = swr_surface_destroy;
+   ctx->pipe.transfer_map = swr_transfer_map;
+   ctx->pipe.transfer_unmap = swr_transfer_unmap;
+
+   ctx->pipe.transfer_flush_region = u_default_transfer_flush_region;
+   ctx->pipe.transfer_inline_write = u_default_transfer_inline_write;
+
+   ctx->pipe.resource_copy_region = swr_resource_copy;
+   ctx->pipe.render_condition = swr_render_condition;
+
+   swr_state_init(&ctx->pipe);
+   swr_clear_init(&ctx->pipe);
+   swr_draw_init(&ctx->pipe);
+   swr_query_init(&ctx->pipe);
+
+   ctx->pipe.blit = swr_blit;
+   ctx->blitter = util_blitter_create(&ctx->pipe);
+   if (!ctx->blitter) {
+      goto fail;
+   }
+
+   swr_init_scratch_buffers(ctx);
+
+   return &ctx->pipe;
+
+fail:
+   /* Should really validate the init steps and fail gracefully */
+   swr_destroy(&ctx->pipe);
+   return NULL;
+}
diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h
new file mode 100644 (file)
index 0000000..73a8e8d
--- /dev/null
@@ -0,0 +1,182 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#ifndef SWR_CONTEXT_H
+#define SWR_CONTEXT_H
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_blitter.h"
+#include "jit_api.h"
+#include "swr_state.h"
+#include <unordered_map>
+
+#define SWR_NEW_BLEND (1 << 0)
+#define SWR_NEW_RASTERIZER (1 << 1)
+#define SWR_NEW_DEPTH_STENCIL_ALPHA (1 << 2)
+#define SWR_NEW_SAMPLER (1 << 3)
+#define SWR_NEW_SAMPLER_VIEW (1 << 4)
+#define SWR_NEW_VS (1 << 5)
+#define SWR_NEW_FS (1 << 6)
+#define SWR_NEW_VSCONSTANTS (1 << 7)
+#define SWR_NEW_FSCONSTANTS (1 << 8)
+#define SWR_NEW_VERTEX (1 << 9)
+#define SWR_NEW_STIPPLE (1 << 10)
+#define SWR_NEW_SCISSOR (1 << 11)
+#define SWR_NEW_VIEWPORT (1 << 12)
+#define SWR_NEW_FRAMEBUFFER (1 << 13)
+#define SWR_NEW_CLIP (1 << 14)
+#define SWR_NEW_SO (1 << 15)
+#define SWR_NEW_ALL 0x0000ffff
+
+namespace std
+{
+template <> struct hash<BLEND_COMPILE_STATE> {
+   std::size_t operator()(const BLEND_COMPILE_STATE &k) const
+   {
+      return util_hash_crc32(&k, sizeof(k));
+   }
+};
+};
+
+struct swr_jit_texture {
+   uint32_t width; // same as number of elements
+   uint32_t height;
+   uint32_t depth; // doubles as array size
+   uint32_t first_level;
+   uint32_t last_level;
+   const void *base_ptr;
+   uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS];
+   uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS];
+   uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS];
+};
+
+struct swr_jit_sampler {
+   float min_lod;
+   float max_lod;
+   float lod_bias;
+   float border_color[4];
+};
+
+struct swr_draw_context {
+   const float *constantVS[PIPE_MAX_CONSTANT_BUFFERS];
+   unsigned num_constantsVS[PIPE_MAX_CONSTANT_BUFFERS];
+   const float *constantFS[PIPE_MAX_CONSTANT_BUFFERS];
+   unsigned num_constantsFS[PIPE_MAX_CONSTANT_BUFFERS];
+
+   swr_jit_texture texturesVS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   swr_jit_sampler samplersVS[PIPE_MAX_SAMPLERS];
+   swr_jit_texture texturesFS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   swr_jit_sampler samplersFS[PIPE_MAX_SAMPLERS];
+
+   SWR_SURFACE_STATE renderTargets[SWR_NUM_ATTACHMENTS];
+};
+
+struct swr_context {
+   struct pipe_context pipe; /**< base class */
+
+   HANDLE swrContext;
+
+   /** Constant state objects */
+   struct swr_blend_state *blend;
+   struct pipe_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+   struct pipe_depth_stencil_alpha_state *depth_stencil;
+   struct pipe_rasterizer_state *rasterizer;
+
+   struct swr_vertex_shader *vs;
+   struct swr_fragment_shader *fs;
+   struct swr_vertex_element_state *velems;
+
+   /** Other rendering state */
+   struct pipe_blend_color blend_color;
+   struct pipe_stencil_ref stencil_ref;
+   struct pipe_clip_state clip;
+   struct pipe_constant_buffer
+      constants[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS];
+   struct pipe_framebuffer_state framebuffer;
+   struct pipe_poly_stipple poly_stipple;
+   struct pipe_scissor_state scissor;
+   struct pipe_sampler_view *
+      sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
+
+   struct pipe_viewport_state viewport;
+   struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+   struct pipe_index_buffer index_buffer;
+
+   struct blitter_context *blitter;
+
+   /** Conditional query object and mode */
+   struct pipe_query *render_cond_query;
+   uint render_cond_mode;
+   boolean render_cond_cond;
+   unsigned active_queries;
+
+   unsigned num_vertex_buffers;
+   unsigned num_samplers[PIPE_SHADER_TYPES];
+   unsigned num_sampler_views[PIPE_SHADER_TYPES];
+
+   unsigned sample_mask;
+
+   // streamout
+   pipe_stream_output_target *so_targets[MAX_SO_STREAMS];
+   uint32_t num_so_targets;
+
+   /* Temp storage for user_buffer constants */
+   struct swr_scratch_buffers *scratch;
+
+   // blend jit functions
+   std::unordered_map<BLEND_COMPILE_STATE, PFN_BLEND_JIT_FUNC> *blendJIT;
+
+   /* Derived SWR API DrawState */
+   struct swr_derived_state derived;
+
+   /* SWR private state - draw context */
+   struct swr_draw_context swrDC;
+
+   unsigned dirty; /**< Mask of SWR_NEW_x flags */
+};
+
+static INLINE struct swr_context *
+swr_context(struct pipe_context *pipe)
+{
+   return (struct swr_context *)pipe;
+}
+
+static INLINE void
+swr_update_draw_context(struct swr_context *ctx)
+{
+   swr_draw_context *pDC =
+      (swr_draw_context *)SwrGetPrivateContextState(ctx->swrContext);
+   memcpy(pDC, &ctx->swrDC, sizeof(swr_draw_context));
+}
+
+struct pipe_context *swr_create_context(struct pipe_screen *, void *priv, unsigned flags);
+
+void swr_state_init(struct pipe_context *pipe);
+
+void swr_clear_init(struct pipe_context *pipe);
+
+void swr_draw_init(struct pipe_context *pipe);
+
+void swr_finish(struct pipe_context *pipe);
+#endif
diff --git a/src/gallium/drivers/swr/swr_context_llvm.h b/src/gallium/drivers/swr/swr_context_llvm.h
new file mode 100644 (file)
index 0000000..58da813
--- /dev/null
@@ -0,0 +1,124 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#pragma once
+
+//////////////////////////////////////////////////////////////////////////
+/// Generate LLVM type information for swr_jit_texture
+INLINE static StructType *
+Gen_swr_jit_texture(JitManager *pShG)
+{
+   LLVMContext &ctx = pShG->mContext;
+   std::vector<Type *> members;
+
+   members.push_back(Type::getInt32Ty(ctx)); // width
+   members.push_back(Type::getInt32Ty(ctx)); // height
+   members.push_back(Type::getInt32Ty(ctx)); // depth
+   members.push_back(Type::getInt32Ty(ctx)); // first_level
+   members.push_back(Type::getInt32Ty(ctx)); // last_level
+   members.push_back(PointerType::get(Type::getInt8Ty(ctx), 0)); // base_ptr
+   members.push_back(ArrayType::get(Type::getInt32Ty(ctx),
+                                    PIPE_MAX_TEXTURE_LEVELS)); // row_stride
+   members.push_back(ArrayType::get(Type::getInt32Ty(ctx),
+                                    PIPE_MAX_TEXTURE_LEVELS)); // img_stride
+   members.push_back(ArrayType::get(Type::getInt32Ty(ctx),
+                                    PIPE_MAX_TEXTURE_LEVELS)); // mip_offsets
+
+   return StructType::get(ctx, members, false);
+}
+
+static const UINT swr_jit_texture_width = 0;
+static const UINT swr_jit_texture_height = 1;
+static const UINT swr_jit_texture_depth = 2;
+static const UINT swr_jit_texture_first_level = 3;
+static const UINT swr_jit_texture_last_level = 4;
+static const UINT swr_jit_texture_base_ptr = 5;
+static const UINT swr_jit_texture_row_stride = 6;
+static const UINT swr_jit_texture_img_stride = 7;
+static const UINT swr_jit_texture_mip_offsets = 8;
+
+//////////////////////////////////////////////////////////////////////////
+/// Generate LLVM type information for swr_jit_sampler
+INLINE static StructType *
+Gen_swr_jit_sampler(JitManager *pShG)
+{
+   LLVMContext &ctx = pShG->mContext;
+   std::vector<Type *> members;
+
+   members.push_back(Type::getFloatTy(ctx)); // min_lod
+   members.push_back(Type::getFloatTy(ctx)); // max_lod
+   members.push_back(Type::getFloatTy(ctx)); // lod_bias
+   members.push_back(
+      ArrayType::get(Type::getFloatTy(ctx), 4)); // border_color
+
+   return StructType::get(ctx, members, false);
+}
+
+static const UINT swr_jit_sampler_min_lod = 0;
+static const UINT swr_jit_sampler_max_lod = 1;
+static const UINT swr_jit_sampler_lod_bias = 2;
+static const UINT swr_jit_sampler_border_color = 3;
+
+//////////////////////////////////////////////////////////////////////////
+/// Generate LLVM type information for swr_draw_context
+INLINE static StructType *
+Gen_swr_draw_context(JitManager *pShG)
+{
+   LLVMContext &ctx = pShG->mContext;
+   std::vector<Type *> members;
+
+   members.push_back(
+      ArrayType::get(PointerType::get(Type::getFloatTy(ctx), 0),
+                     PIPE_MAX_CONSTANT_BUFFERS)); // constantVS
+   members.push_back(ArrayType::get(
+      Type::getInt32Ty(ctx), PIPE_MAX_CONSTANT_BUFFERS)); // num_constantsVS
+   members.push_back(
+      ArrayType::get(PointerType::get(Type::getFloatTy(ctx), 0),
+                     PIPE_MAX_CONSTANT_BUFFERS)); // constantFS
+   members.push_back(ArrayType::get(
+      Type::getInt32Ty(ctx), PIPE_MAX_CONSTANT_BUFFERS)); // num_constantsFS
+   members.push_back(
+      ArrayType::get(Gen_swr_jit_texture(pShG),
+                     PIPE_MAX_SHADER_SAMPLER_VIEWS)); // texturesVS
+   members.push_back(ArrayType::get(Gen_swr_jit_sampler(pShG),
+                                    PIPE_MAX_SAMPLERS)); // samplersVS
+   members.push_back(
+      ArrayType::get(Gen_swr_jit_texture(pShG),
+                     PIPE_MAX_SHADER_SAMPLER_VIEWS)); // texturesFS
+   members.push_back(ArrayType::get(Gen_swr_jit_sampler(pShG),
+                                    PIPE_MAX_SAMPLERS)); // samplersFS
+   members.push_back(ArrayType::get(Gen_SWR_SURFACE_STATE(pShG),
+                                    SWR_NUM_ATTACHMENTS)); // renderTargets
+
+   return StructType::get(ctx, members, false);
+}
+
+static const UINT swr_draw_context_constantVS = 0;
+static const UINT swr_draw_context_num_constantsVS = 1;
+static const UINT swr_draw_context_constantFS = 2;
+static const UINT swr_draw_context_num_constantsFS = 3;
+static const UINT swr_draw_context_texturesVS = 4;
+static const UINT swr_draw_context_samplersVS = 5;
+static const UINT swr_draw_context_texturesFS = 6;
+static const UINT swr_draw_context_samplersFS = 7;
+static const UINT swr_draw_context_renderTargets = 8;
diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp
new file mode 100644 (file)
index 0000000..a775bd2
--- /dev/null
@@ -0,0 +1,271 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#include "swr_screen.h"
+#include "swr_context.h"
+#include "swr_resource.h"
+#include "swr_fence.h"
+#include "swr_query.h"
+#include "jit_api.h"
+
+#include "util/u_draw.h"
+#include "util/u_prim.h"
+
+/*
+ * Convert mesa PIPE_PRIM_X to SWR enum PRIMITIVE_TOPOLOGY
+ */
+static INLINE enum PRIMITIVE_TOPOLOGY
+swr_convert_prim_topology(const unsigned mode)
+{
+   switch (mode) {
+   case PIPE_PRIM_POINTS:
+      return TOP_POINT_LIST;
+   case PIPE_PRIM_LINES:
+      return TOP_LINE_LIST;
+   case PIPE_PRIM_LINE_LOOP:
+      return TOP_LINE_LOOP;
+   case PIPE_PRIM_LINE_STRIP:
+      return TOP_LINE_STRIP;
+   case PIPE_PRIM_TRIANGLES:
+      return TOP_TRIANGLE_LIST;
+   case PIPE_PRIM_TRIANGLE_STRIP:
+      return TOP_TRIANGLE_STRIP;
+   case PIPE_PRIM_TRIANGLE_FAN:
+      return TOP_TRIANGLE_FAN;
+   case PIPE_PRIM_QUADS:
+      return TOP_QUAD_LIST;
+   case PIPE_PRIM_QUAD_STRIP:
+      return TOP_QUAD_STRIP;
+   case PIPE_PRIM_POLYGON:
+      return TOP_TRIANGLE_FAN; /* XXX TOP_POLYGON; */
+   case PIPE_PRIM_LINES_ADJACENCY:
+      return TOP_LINE_LIST_ADJ;
+   case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+      return TOP_LISTSTRIP_ADJ;
+   case PIPE_PRIM_TRIANGLES_ADJACENCY:
+      return TOP_TRI_LIST_ADJ;
+   case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+      return TOP_TRI_STRIP_ADJ;
+   default:
+      assert(0 && "Unknown topology");
+      return TOP_UNKNOWN;
+   }
+};
+
+
+/*
+ * Draw vertex arrays, with optional indexing, optional instancing.
+ */
+static void
+swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+{
+   struct swr_context *ctx = swr_context(pipe);
+
+   if (!swr_check_render_cond(pipe))
+      return;
+
+   if (info->indirect) {
+      util_draw_indirect(pipe, info);
+      return;
+   }
+
+   /* Update derived state, pass draw info to update function */
+   if (ctx->dirty)
+      swr_update_derived(ctx, info);
+
+   swr_update_draw_context(ctx);
+
+   if (ctx->vs->pipe.stream_output.num_outputs) {
+      if (!ctx->vs->soFunc[info->mode]) {
+         STREAMOUT_COMPILE_STATE state = {0};
+         struct pipe_stream_output_info *so = &ctx->vs->pipe.stream_output;
+
+         state.numVertsPerPrim = u_vertices_per_prim(info->mode);
+
+         uint32_t offsets[MAX_SO_STREAMS] = {0};
+         uint32_t num = 0;
+
+         for (uint32_t i = 0; i < so->num_outputs; i++) {
+            assert(so->output[i].stream == 0); // @todo
+            uint32_t output_buffer = so->output[i].output_buffer;
+            if (so->output[i].dst_offset != offsets[output_buffer]) {
+               // hole - need to fill
+               state.stream.decl[num].bufferIndex = output_buffer;
+               state.stream.decl[num].hole = true;
+               state.stream.decl[num].componentMask =
+                  (1 << (so->output[i].dst_offset - offsets[output_buffer]))
+                  - 1;
+               num++;
+               offsets[output_buffer] = so->output[i].dst_offset;
+            }
+
+            state.stream.decl[num].bufferIndex = output_buffer;
+            state.stream.decl[num].attribSlot = so->output[i].register_index - 1;
+            state.stream.decl[num].componentMask =
+               ((1 << so->output[i].num_components) - 1)
+               << so->output[i].start_component;
+            state.stream.decl[num].hole = false;
+            num++;
+
+            offsets[output_buffer] += so->output[i].num_components;
+         }
+
+         state.stream.numDecls = num;
+
+         HANDLE hJitMgr = swr_screen(pipe->screen)->hJitMgr;
+         ctx->vs->soFunc[info->mode] = JitCompileStreamout(hJitMgr, state);
+         debug_printf("so shader    %p\n", ctx->vs->soFunc[info->mode]);
+         assert(ctx->vs->soFunc[info->mode] && "Error: SoShader = NULL");
+      }
+
+      SwrSetSoFunc(ctx->swrContext, ctx->vs->soFunc[info->mode], 0);
+   }
+
+   struct swr_vertex_element_state *velems = ctx->velems;
+   if (!velems->fsFunc
+       || (velems->fsState.cutIndex != info->restart_index)
+       || (velems->fsState.bEnableCutIndex != info->primitive_restart)) {
+
+      velems->fsState.cutIndex = info->restart_index;
+      velems->fsState.bEnableCutIndex = info->primitive_restart;
+
+      /* Create Fetch Shader */
+      HANDLE hJitMgr = swr_screen(ctx->pipe.screen)->hJitMgr;
+      velems->fsFunc = JitCompileFetch(hJitMgr, velems->fsState);
+
+      debug_printf("fetch shader %p\n", velems->fsFunc);
+      assert(velems->fsFunc && "Error: FetchShader = NULL");
+   }
+
+   SwrSetFetchFunc(ctx->swrContext, velems->fsFunc);
+
+   if (info->indexed)
+      SwrDrawIndexedInstanced(ctx->swrContext,
+                              swr_convert_prim_topology(info->mode),
+                              info->count,
+                              info->instance_count,
+                              info->start,
+                              info->index_bias,
+                              info->start_instance);
+   else
+      SwrDrawInstanced(ctx->swrContext,
+                       swr_convert_prim_topology(info->mode),
+                       info->count,
+                       info->instance_count,
+                       info->start,
+                       info->start_instance);
+}
+
+
+static void
+swr_flush(struct pipe_context *pipe,
+          struct pipe_fence_handle **fence,
+          unsigned flags)
+{
+   struct swr_context *ctx = swr_context(pipe);
+   struct swr_screen *screen = swr_screen(pipe->screen);
+
+   /* If the current renderTarget is the display surface, store tiles back to
+    * the surface, in
+    * preparation for present (swr_flush_frontbuffer)
+    */
+   struct pipe_surface *cb = ctx->framebuffer.cbufs[0];
+   if (cb && swr_resource(cb->texture)->display_target) {
+      swr_store_render_target(ctx, SWR_ATTACHMENT_COLOR0, SWR_TILE_RESOLVED);
+      swr_resource(cb->texture)->bound_to_context = (void*)pipe;
+   }
+
+   // SwrStoreTiles is asynchronous, always submit the "flush" fence.
+   // flush_frontbuffer needs it.
+   swr_fence_submit(ctx, screen->flush_fence);
+
+   if (fence)
+      swr_fence_reference(pipe->screen, fence, screen->flush_fence);
+}
+
+void
+swr_finish(struct pipe_context *pipe)
+{
+   struct swr_screen *screen = swr_screen(pipe->screen);
+   struct pipe_fence_handle *fence = NULL;
+
+   swr_flush(pipe, &fence, 0);
+   swr_fence_finish(&screen->base, fence, 0);
+   swr_fence_reference(&screen->base, &fence, NULL);
+}
+
+
+/*
+ * Store SWR HotTiles back to RenderTarget surface.
+ */
+void
+swr_store_render_target(struct swr_context *ctx,
+                        uint32_t attachment,
+                        enum SWR_TILE_STATE post_tile_state)
+{
+   struct swr_draw_context *pDC = &ctx->swrDC;
+   struct SWR_SURFACE_STATE *renderTarget = &pDC->renderTargets[attachment];
+
+   /* Only proceed if there's a valid surface to store to */
+   if (renderTarget->pBaseAddress) {
+      /* Set viewport to full renderTarget width/height and disable scissor
+       * before StoreTiles */
+      boolean change_viewport =
+         (ctx->derived.vp.x != 0.0f || ctx->derived.vp.y != 0.0f
+          || ctx->derived.vp.width != renderTarget->width
+          || ctx->derived.vp.height != renderTarget->height);
+      if (change_viewport) {
+         SWR_VIEWPORT vp = {0};
+         vp.width = renderTarget->width;
+         vp.height = renderTarget->height;
+         SwrSetViewports(ctx->swrContext, 1, &vp, NULL);
+      }
+
+      boolean scissor_enable = ctx->derived.rastState.scissorEnable;
+      if (scissor_enable) {
+         ctx->derived.rastState.scissorEnable = FALSE;
+         SwrSetRastState(ctx->swrContext, &ctx->derived.rastState);
+      }
+
+      swr_update_draw_context(ctx);
+      SwrStoreTiles(ctx->swrContext,
+                    (enum SWR_RENDERTARGET_ATTACHMENT)attachment,
+                    post_tile_state);
+
+      /* Restore viewport and scissor enable */
+      if (change_viewport)
+         SwrSetViewports(ctx->swrContext, 1, &ctx->derived.vp, &ctx->derived.vpm);
+      if (scissor_enable) {
+         ctx->derived.rastState.scissorEnable = scissor_enable;
+         SwrSetRastState(ctx->swrContext, &ctx->derived.rastState);
+      }
+   }
+}
+
+
+void
+swr_draw_init(struct pipe_context *pipe)
+{
+   pipe->draw_vbo = swr_draw_vbo;
+   pipe->flush = swr_flush;
+}
diff --git a/src/gallium/drivers/swr/swr_fence.cpp b/src/gallium/drivers/swr/swr_fence.cpp
new file mode 100644 (file)
index 0000000..f97ea22
--- /dev/null
@@ -0,0 +1,143 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#include "pipe/p_screen.h"
+#include "util/u_memory.h"
+#include "os/os_time.h"
+
+#include "swr_context.h"
+#include "swr_screen.h"
+#include "swr_fence.h"
+
+#if defined(PIPE_CC_MSVC) // portable thread yield
+   #define sched_yield SwitchToThread  
+#endif
+/*
+ * Fence callback, called by back-end thread on completion of all rendering up
+ * to SwrSync call.
+ */
+static void
+swr_sync_cb(UINT64 userData, UINT64 userData2, UINT64 userData3)
+{
+   struct swr_fence *fence = (struct swr_fence *)userData;
+
+   fence->read = fence->write;
+}
+
+/*
+ * Submit an existing fence.
+ */
+void
+swr_fence_submit(struct swr_context *ctx, struct pipe_fence_handle *fh)
+{
+   struct swr_fence *fence = swr_fence(fh);
+
+   fence->write++;
+   SwrSync(ctx->swrContext, swr_sync_cb, (UINT64)fence, 0, 0);
+}
+
+/*
+ * Create a new fence object.
+ */
+struct pipe_fence_handle *
+swr_fence_create()
+{
+   static int fence_id = 0;
+   struct swr_fence *fence = CALLOC_STRUCT(swr_fence);
+   if (!fence)
+      return NULL;
+
+   memset(fence, 0, sizeof(*fence));
+   pipe_reference_init(&fence->reference, 1);
+   fence->id = fence_id++;
+
+   return (struct pipe_fence_handle *)fence;
+}
+
+/** Destroy a fence.  Called when refcount hits zero. */
+static void
+swr_fence_destroy(struct swr_fence *fence)
+{
+   FREE(fence);
+}
+
+/**
+ * Set ptr = fence, with reference counting
+ */
+void
+swr_fence_reference(struct pipe_screen *screen,
+                    struct pipe_fence_handle **ptr,
+                    struct pipe_fence_handle *f)
+{
+   struct swr_fence *fence = swr_fence(f);
+   struct swr_fence *old;
+
+   if (likely(ptr)) {
+      old = swr_fence(*ptr);
+      *ptr = f;
+   } else {
+      old = NULL;
+   }
+
+   if (pipe_reference(&old->reference, &fence->reference))
+      swr_fence_destroy(old);
+}
+
+/*
+ * Wait for the fence to finish.
+ */
+boolean
+swr_fence_finish(struct pipe_screen *screen,
+                 struct pipe_fence_handle *fence_handle,
+                 uint64_t timeout)
+{
+   struct swr_fence *fence = swr_fence(fence_handle);
+
+   while (!swr_is_fence_done(fence))
+      sched_yield();
+
+   return TRUE;
+}
+
+
+uint64_t
+swr_get_timestamp(struct pipe_screen *screen)
+{
+   return os_time_get_nano();
+}
+
+
+void
+swr_fence_init(struct pipe_screen *p_screen)
+{
+   p_screen->fence_reference = swr_fence_reference;
+   p_screen->fence_finish = swr_fence_finish;
+
+   p_screen->get_timestamp = swr_get_timestamp;
+
+   /*
+    * Create persistant "flush" fence, submitted when swr_flush is called.
+    */
+   struct swr_screen *screen = swr_screen(p_screen);
+   screen->flush_fence = swr_fence_create();
+}
diff --git a/src/gallium/drivers/swr/swr_fence.h b/src/gallium/drivers/swr/swr_fence.h
new file mode 100644 (file)
index 0000000..257b240
--- /dev/null
@@ -0,0 +1,70 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ ***************************************************************************/
+
+#ifndef SWR_FENCE_H
+#define SWR_FENCE_H
+
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+
+struct pipe_screen;
+
+struct swr_fence {
+   struct pipe_reference reference;
+
+   uint64_t read;
+   uint64_t write;
+
+   unsigned id; /* Just for reference */
+};
+
+
+static inline struct swr_fence *
+swr_fence(struct pipe_fence_handle *fence)
+{
+   return (struct swr_fence *)fence;
+}
+
+static INLINE boolean
+swr_is_fence_done(struct swr_fence *fence)
+{
+   return (fence->read == fence->write);
+}
+
+
+void swr_fence_init(struct pipe_screen *screen);
+
+struct pipe_fence_handle *swr_fence_create();
+
+void swr_fence_reference(struct pipe_screen *screen,
+                         struct pipe_fence_handle **ptr,
+                         struct pipe_fence_handle *f);
+
+boolean swr_fence_finish(struct pipe_screen *screen,
+                         struct pipe_fence_handle *fence_handle,
+                         uint64_t timeout);
+
+void
+swr_fence_submit(struct swr_context *ctx, struct pipe_fence_handle *fence);
+
+uint64_t swr_get_timestamp(struct pipe_screen *screen);
+
+#endif
diff --git a/src/gallium/drivers/swr/swr_loader.cpp b/src/gallium/drivers/swr/swr_loader.cpp
new file mode 100644 (file)
index 0000000..2113c37
--- /dev/null
@@ -0,0 +1,67 @@
+/****************************************************************************
+ * Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#include "util/u_cpu_detect.h"
+#include "util/u_dl.h"
+#include "swr_public.h"
+
+#include <stdio.h>
+#include <dlfcn.h>
+
+typedef pipe_screen *(*screen_create_proc)(struct sw_winsys *winsys);
+
+struct pipe_screen *
+swr_create_screen(struct sw_winsys *winsys)
+{
+   fprintf(stderr, "SWR detected ");
+
+   util_dl_library *pLibrary = nullptr;
+
+   util_cpu_detect();
+   if (util_cpu_caps.has_avx2) {
+      fprintf(stderr, "AVX2\n");
+      pLibrary = util_dl_open("libswrAVX2.so");
+   } else if (util_cpu_caps.has_avx) {
+      fprintf(stderr, "AVX\n");
+      pLibrary = util_dl_open("libswrAVX.so");
+   } else {
+      fprintf(stderr, "no AVX/AVX2 support.  Aborting!\n");
+      exit(-1);
+   }
+
+   if (!pLibrary) {
+      fprintf(stderr, "SWR library load failure: %s\n", util_dl_error());
+      exit(-1);
+   }
+
+   util_dl_proc pScreenProc = util_dl_get_proc_address(pLibrary, "swr_create_screen");
+
+   if (!pScreenProc) {
+      fprintf(stderr, "SWR library search failure: %s\n", util_dl_error());
+      exit(-1);
+   }
+
+   screen_create_proc pScreenCreate = (screen_create_proc)pScreenProc;
+
+   return pScreenCreate(winsys);
+}
diff --git a/src/gallium/drivers/swr/swr_memory.h b/src/gallium/drivers/swr/swr_memory.h
new file mode 100644 (file)
index 0000000..d116781
--- /dev/null
@@ -0,0 +1,99 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#pragma once
+
+void LoadHotTile(
+    SWR_SURFACE_STATE *pSrcSurface,
+    SWR_FORMAT dstFormat,
+    SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+    UINT x, UINT y, uint32_t renderTargetArrayIndex,
+    BYTE *pDstHotTile);
+
+void StoreHotTile(
+    SWR_SURFACE_STATE *pDstSurface,
+    SWR_FORMAT srcFormat,
+    SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+    UINT x, UINT y, uint32_t renderTargetArrayIndex,
+    BYTE *pSrcHotTile);
+
+void StoreHotTileClear(
+    SWR_SURFACE_STATE *pDstSurface,
+    SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+    UINT x,
+    UINT y,
+    const float* pClearColor);
+
+INLINE void
+swr_LoadHotTile(HANDLE hPrivateContext,
+                SWR_FORMAT dstFormat,
+                SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+                UINT x, UINT y,
+                uint32_t renderTargetArrayIndex, BYTE* pDstHotTile)
+{
+   // Grab source surface state from private context
+   swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
+   SWR_SURFACE_STATE *pSrcSurface = &pDC->renderTargets[renderTargetIndex];
+
+   LoadHotTile(pSrcSurface, dstFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pDstHotTile);
+}
+
+INLINE void
+swr_StoreHotTile(HANDLE hPrivateContext,
+                 SWR_FORMAT srcFormat,
+                 SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+                 UINT x, UINT y,
+                 uint32_t renderTargetArrayIndex, BYTE* pSrcHotTile)
+{
+   // Grab destination surface state from private context
+   swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
+   SWR_SURFACE_STATE *pDstSurface = &pDC->renderTargets[renderTargetIndex];
+
+   StoreHotTile(pDstSurface, srcFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pSrcHotTile);
+}
+
+INLINE void
+swr_StoreHotTileClear(HANDLE hPrivateContext,
+                      SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+                      UINT x,
+                      UINT y,
+                      const float* pClearColor)
+{
+   // Grab destination surface state from private context
+   swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;
+   SWR_SURFACE_STATE *pDstSurface = &pDC->renderTargets[renderTargetIndex];
+
+   StoreHotTileClear(pDstSurface, renderTargetIndex, x, y, pClearColor);
+}
+
+void InitSimLoadTilesTable();
+void InitSimStoreTilesTable();
+void InitSimClearTilesTable();
+
+/* Init Load/Store/ClearTiles Tables */
+INLINE void swr_InitMemoryModule()
+{
+   InitSimLoadTilesTable();
+   InitSimStoreTilesTable();
+   InitSimClearTilesTable();
+}
diff --git a/src/gallium/drivers/swr/swr_public.h b/src/gallium/drivers/swr/swr_public.h
new file mode 100644 (file)
index 0000000..0814c3b
--- /dev/null
@@ -0,0 +1,46 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#ifndef SWR_PUBLIC_H
+#define SWR_PUBLIC_H
+
+struct pipe_screen;
+struct sw_winsys;
+struct sw_displaytarget;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct pipe_screen *swr_create_screen(struct sw_winsys *winsys);
+
+struct sw_winsys *swr_get_winsys(struct pipe_screen *pipe);
+
+struct sw_displaytarget *swr_get_displaytarget(struct pipe_resource *resource);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gallium/drivers/swr/swr_query.cpp b/src/gallium/drivers/swr/swr_query.cpp
new file mode 100644 (file)
index 0000000..2510b3a
--- /dev/null
@@ -0,0 +1,334 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "os/os_time.h"
+#include "swr_context.h"
+#include "swr_fence.h"
+#include "swr_query.h"
+#include "swr_screen.h"
+#include "swr_state.h"
+
+
+static struct swr_query *
+swr_query(struct pipe_query *p)
+{
+   return (struct swr_query *)p;
+}
+
+static struct pipe_query *
+swr_create_query(struct pipe_context *pipe, unsigned type, unsigned index)
+{
+   struct swr_query *pq;
+
+   assert(type < PIPE_QUERY_TYPES);
+   assert(index < MAX_SO_STREAMS);
+
+   pq = CALLOC_STRUCT(swr_query);
+
+   if (pq) {
+      pq->type = type;
+      pq->index = index;
+   }
+
+   return (struct pipe_query *)pq;
+}
+
+
+static void
+swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
+{
+   struct swr_query *pq = swr_query(q);
+
+   if (pq->fence) {
+      if (!swr_is_fence_done(swr_fence(pq->fence))) {
+         swr_fence_submit(swr_context(pipe), pq->fence);
+         swr_fence_finish(pipe->screen, pq->fence, 0);
+      }
+      swr_fence_reference(pipe->screen, &pq->fence, NULL);
+   }
+
+   FREE(pq);
+}
+
+
+// XXX Create a fence callback, rather than stalling SwrWaitForIdle
+static void
+swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq)
+{
+   struct swr_context *ctx = swr_context(pipe);
+
+   assert(pq->result);
+   union pipe_query_result *result = pq->result;
+   boolean enable_stats = pq->enable_stats;
+   SWR_STATS swr_stats = {0};
+
+   if (pq->fence) {
+      if (!swr_is_fence_done(swr_fence(pq->fence))) {
+         swr_fence_submit(ctx, pq->fence);
+         swr_fence_finish(pipe->screen, pq->fence, 0);
+      }
+      swr_fence_reference(pipe->screen, &pq->fence, NULL);
+   }
+
+   /*
+    * These queries don't need SWR Stats enabled in the core
+    * Set and return.
+    */
+   switch (pq->type) {
+   case PIPE_QUERY_TIMESTAMP:
+   case PIPE_QUERY_TIME_ELAPSED:
+      result->u64 = swr_get_timestamp(pipe->screen);
+      return;
+      break;
+   case PIPE_QUERY_TIMESTAMP_DISJOINT:
+      /* nothing to do here */
+      return;
+      break;
+   case PIPE_QUERY_GPU_FINISHED:
+      result->b = TRUE; /* XXX TODO Add an api func to SWR to compare drawId
+                           vs LastRetiredId? */
+      return;
+      break;
+   default:
+      /* Any query that needs SwrCore stats */
+      break;
+   }
+
+   /*
+    * All other results are collected from SwrCore counters
+    */
+
+   /* XXX, Should turn this into a fence callback and skip the stall */
+   SwrGetStats(ctx->swrContext, &swr_stats);
+   /* SwrGetStats returns immediately, wait for collection */
+   SwrWaitForIdle(ctx->swrContext);
+
+   switch (pq->type) {
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+      result->u64 = swr_stats.DepthPassCount;
+      break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+      result->u64 = swr_stats.IaPrimitives;
+      break;
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+      result->u64 = swr_stats.SoNumPrimsWritten[pq->index];
+      break;
+   case PIPE_QUERY_SO_STATISTICS:
+   case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
+      struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
+      so_stats->num_primitives_written =
+         swr_stats.SoNumPrimsWritten[pq->index];
+      so_stats->primitives_storage_needed =
+         swr_stats.SoPrimStorageNeeded[pq->index];
+   } break;
+   case PIPE_QUERY_PIPELINE_STATISTICS: {
+      struct pipe_query_data_pipeline_statistics *p_stats =
+         &result->pipeline_statistics;
+      p_stats->ia_vertices = swr_stats.IaVertices;
+      p_stats->ia_primitives = swr_stats.IaPrimitives;
+      p_stats->vs_invocations = swr_stats.VsInvocations;
+      p_stats->gs_invocations = swr_stats.GsInvocations;
+      p_stats->gs_primitives = swr_stats.GsPrimitives;
+      p_stats->c_invocations = swr_stats.CPrimitives;
+      p_stats->c_primitives = swr_stats.CPrimitives;
+      p_stats->ps_invocations = swr_stats.PsInvocations;
+      p_stats->hs_invocations = swr_stats.HsInvocations;
+      p_stats->ds_invocations = swr_stats.DsInvocations;
+      p_stats->cs_invocations = swr_stats.CsInvocations;
+   } break;
+   default:
+      assert(0 && "Unsupported query");
+      break;
+   }
+
+   /* Only change stat collection if there are no active queries */
+   if (ctx->active_queries == 0)
+      SwrEnableStats(ctx->swrContext, enable_stats);
+}
+
+
+static boolean
+swr_get_query_result(struct pipe_context *pipe,
+                     struct pipe_query *q,
+                     boolean wait,
+                     union pipe_query_result *result)
+{
+   struct swr_context *ctx = swr_context(pipe);
+   struct swr_query *pq = swr_query(q);
+
+   if (pq->fence) {
+      if (!swr_is_fence_done(swr_fence(pq->fence))) {
+         swr_fence_submit(ctx, pq->fence);
+         if (!wait)
+            return FALSE;
+         swr_fence_finish(pipe->screen, pq->fence, 0);
+      }
+      swr_fence_reference(pipe->screen, &pq->fence, NULL);
+   }
+
+   /* XXX: Need to handle counter rollover */
+
+   switch (pq->type) {
+   /* Booleans */
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      result->b = pq->end.u64 != pq->start.u64 ? TRUE : FALSE;
+      break;
+   case PIPE_QUERY_GPU_FINISHED:
+      result->b = pq->end.b;
+      break;
+   /* Counters */
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_TIMESTAMP:
+   case PIPE_QUERY_TIME_ELAPSED:
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+      result->u64 = pq->end.u64 - pq->start.u64;
+      break;
+   /* Structures */
+   case PIPE_QUERY_SO_STATISTICS: {
+      struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
+      struct pipe_query_data_so_statistics *start = &pq->start.so_statistics;
+      struct pipe_query_data_so_statistics *end = &pq->end.so_statistics;
+      so_stats->num_primitives_written =
+         end->num_primitives_written - start->num_primitives_written;
+      so_stats->primitives_storage_needed =
+         end->primitives_storage_needed - start->primitives_storage_needed;
+   } break;
+   case PIPE_QUERY_TIMESTAMP_DISJOINT: {
+      /* os_get_time_nano returns nanoseconds */
+      result->timestamp_disjoint.frequency = UINT64_C(1000000000);
+      result->timestamp_disjoint.disjoint = FALSE;
+   } break;
+   case PIPE_QUERY_PIPELINE_STATISTICS: {
+      struct pipe_query_data_pipeline_statistics *p_stats =
+         &result->pipeline_statistics;
+      struct pipe_query_data_pipeline_statistics *start =
+         &pq->start.pipeline_statistics;
+      struct pipe_query_data_pipeline_statistics *end =
+         &pq->end.pipeline_statistics;
+      p_stats->ia_vertices = end->ia_vertices - start->ia_vertices;
+      p_stats->ia_primitives = end->ia_primitives - start->ia_primitives;
+      p_stats->vs_invocations = end->vs_invocations - start->vs_invocations;
+      p_stats->gs_invocations = end->gs_invocations - start->gs_invocations;
+      p_stats->gs_primitives = end->gs_primitives - start->gs_primitives;
+      p_stats->c_invocations = end->c_invocations - start->c_invocations;
+      p_stats->c_primitives = end->c_primitives - start->c_primitives;
+      p_stats->ps_invocations = end->ps_invocations - start->ps_invocations;
+      p_stats->hs_invocations = end->hs_invocations - start->hs_invocations;
+      p_stats->ds_invocations = end->ds_invocations - start->ds_invocations;
+      p_stats->cs_invocations = end->cs_invocations - start->cs_invocations;
+   } break;
+   case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
+      struct pipe_query_data_so_statistics *start = &pq->start.so_statistics;
+      struct pipe_query_data_so_statistics *end = &pq->end.so_statistics;
+      uint64_t num_primitives_written =
+         end->num_primitives_written - start->num_primitives_written;
+      uint64_t primitives_storage_needed =
+         end->primitives_storage_needed - start->primitives_storage_needed;
+      result->b = num_primitives_written > primitives_storage_needed;
+   } break;
+   default:
+      assert(0 && "Unsupported query");
+      break;
+   }
+
+   return TRUE;
+}
+
+static boolean
+swr_begin_query(struct pipe_context *pipe, struct pipe_query *q)
+{
+   struct swr_context *ctx = swr_context(pipe);
+   struct swr_query *pq = swr_query(q);
+
+   /* Initialize Results */
+   memset(&pq->start, 0, sizeof(pq->start));
+   memset(&pq->end, 0, sizeof(pq->end));
+
+   /* Gather start stats and enable SwrCore counters */
+   pq->result = &pq->start;
+   pq->enable_stats = TRUE;
+   swr_gather_stats(pipe, pq);
+   ctx->active_queries++;
+
+   /* override start timestamp to 0 for TIMESTAMP query */
+   if (pq->type == PIPE_QUERY_TIMESTAMP)
+      pq->start.u64 = 0;
+
+   return true;
+}
+
+static void
+swr_end_query(struct pipe_context *pipe, struct pipe_query *q)
+{
+   struct swr_context *ctx = swr_context(pipe);
+   struct swr_query *pq = swr_query(q);
+
+   assert(ctx->active_queries
+          && "swr_end_query, there are no active queries!");
+   ctx->active_queries--;
+
+   /* Gather end stats and disable SwrCore counters */
+   pq->result = &pq->end;
+   pq->enable_stats = FALSE;
+   swr_gather_stats(pipe, pq);
+}
+
+
+boolean
+swr_check_render_cond(struct pipe_context *pipe)
+{
+   struct swr_context *ctx = swr_context(pipe);
+   boolean b, wait;
+   uint64_t result;
+
+   if (!ctx->render_cond_query)
+      return TRUE; /* no query predicate, draw normally */
+
+   wait = (ctx->render_cond_mode == PIPE_RENDER_COND_WAIT
+           || ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT);
+
+   b = pipe->get_query_result(
+      pipe, ctx->render_cond_query, wait, (union pipe_query_result *)&result);
+   if (b)
+      return (!result == ctx->render_cond_cond);
+   else
+      return TRUE;
+}
+
+void
+swr_query_init(struct pipe_context *pipe)
+{
+   struct swr_context *ctx = swr_context(pipe);
+
+   pipe->create_query = swr_create_query;
+   pipe->destroy_query = swr_destroy_query;
+   pipe->begin_query = swr_begin_query;
+   pipe->end_query = swr_end_query;
+   pipe->get_query_result = swr_get_query_result;
+
+   ctx->active_queries = 0;
+}
diff --git a/src/gallium/drivers/swr/swr_query.h b/src/gallium/drivers/swr/swr_query.h
new file mode 100644 (file)
index 0000000..836d07b
--- /dev/null
@@ -0,0 +1,46 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#ifndef SWR_QUERY_H
+#define SWR_QUERY_H
+
+
+#include <limits.h>
+
+struct swr_query {
+   unsigned type; /* PIPE_QUERY_* */
+   unsigned index;
+
+   union pipe_query_result *result;
+   union pipe_query_result start;
+   union pipe_query_result end;
+
+   struct pipe_fence_handle *fence;
+
+   boolean enable_stats;
+};
+
+extern void swr_query_init(struct pipe_context *pipe);
+
+extern boolean swr_check_render_cond(struct pipe_context *pipe);
+#endif
diff --git a/src/gallium/drivers/swr/swr_resource.h b/src/gallium/drivers/swr/swr_resource.h
new file mode 100644 (file)
index 0000000..87a27ac
--- /dev/null
@@ -0,0 +1,97 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#ifndef SWR_RESOURCE_H
+#define SWR_RESOURCE_H
+
+#include "pipe/p_state.h"
+#include "api.h"
+
+struct sw_displaytarget;
+
+struct swr_resource {
+   struct pipe_resource base;
+
+   bool has_depth;
+   bool has_stencil;
+
+   UINT alignedWidth;
+   UINT alignedHeight;
+
+   SWR_SURFACE_STATE swr;
+   SWR_SURFACE_STATE secondary; // for faking depth/stencil merged formats
+
+   struct sw_displaytarget *display_target;
+
+   unsigned row_stride[PIPE_MAX_TEXTURE_LEVELS];
+   unsigned img_stride[PIPE_MAX_TEXTURE_LEVELS];
+   unsigned mip_offsets[PIPE_MAX_TEXTURE_LEVELS];
+
+   /* Opaque pointer to swr_context to mark resource in use */
+   void *bound_to_context;
+};
+
+
+static INLINE struct swr_resource *
+swr_resource(struct pipe_resource *resource)
+{
+   return (struct swr_resource *)resource;
+}
+
+static INLINE boolean
+swr_resource_is_texture(const struct pipe_resource *resource)
+{
+   switch (resource->target) {
+   case PIPE_BUFFER:
+      return FALSE;
+   case PIPE_TEXTURE_1D:
+   case PIPE_TEXTURE_1D_ARRAY:
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_2D_ARRAY:
+   case PIPE_TEXTURE_RECT:
+   case PIPE_TEXTURE_3D:
+   case PIPE_TEXTURE_CUBE:
+   case PIPE_TEXTURE_CUBE_ARRAY:
+      return TRUE;
+   default:
+      assert(0);
+      return FALSE;
+   }
+}
+
+
+static INLINE void *
+swr_resource_data(struct pipe_resource *resource)
+{
+   struct swr_resource *swr_r = swr_resource(resource);
+
+   assert(!swr_resource_is_texture(resource));
+
+   return swr_r->swr.pBaseAddress;
+}
+
+
+void swr_store_render_target(struct swr_context *ctx,
+                             uint32_t attachment,
+                             enum SWR_TILE_STATE post_tile_state);
+#endif
diff --git a/src/gallium/drivers/swr/swr_scratch.cpp b/src/gallium/drivers/swr/swr_scratch.cpp
new file mode 100644 (file)
index 0000000..e6c448c
--- /dev/null
@@ -0,0 +1,116 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#include "util/u_memory.h"
+#include "swr_context.h"
+#include "swr_scratch.h"
+#include "api.h"
+
+
+void *
+swr_copy_to_scratch_space(struct swr_context *ctx,
+                          struct swr_scratch_space *space,
+                          const void *user_buffer,
+                          unsigned int size)
+{
+   void *ptr;
+   assert(space);
+   assert(user_buffer);
+   assert(size);
+
+   if (size >= 2048) { /* XXX TODO create KNOB_ for this */
+      /* Use per draw SwrAllocDrawContextMemory for larger copies */
+      ptr = SwrAllocDrawContextMemory(ctx->swrContext, size, 4);
+   } else {
+      /* Allocate enough so that MAX_DRAWS_IN_FLIGHT sets fit. */
+      unsigned int max_size_in_flight = size * KNOB_MAX_DRAWS_IN_FLIGHT;
+
+      /* Need to grow space */
+      if (max_size_in_flight > space->current_size) {
+         /* Must idle the pipeline, this is infrequent */
+         SwrWaitForIdle(ctx->swrContext);
+
+         space->current_size = max_size_in_flight;
+
+         if (space->base) {
+            align_free(space->base);
+            space->base = NULL;
+         }
+
+         if (!space->base) {
+            space->base = (BYTE *)align_malloc(space->current_size, 4);
+            space->head = (void *)space->base;
+         }
+      }
+
+      /* Wrap */
+      if (((BYTE *)space->head + size)
+          >= ((BYTE *)space->base + space->current_size)) {
+         /*
+          * TODO XXX: Should add a fence on wrap.  Assumption is that
+          * current_space >> size, and there are at least MAX_DRAWS_IN_FLIGHT
+          * draws in scratch.  So fence would always be met on wrap.  A fence
+          * would ensure that first frame in buffer is done before wrapping.
+          * If fence ever needs to be waited on, can increase buffer size.
+          * So far in testing, this hasn't been necessary.
+          */
+         space->head = space->base;
+      }
+
+      ptr = space->head;
+      space->head = (BYTE *)space->head + size;
+   }
+
+   /* Copy user_buffer to scratch */
+   memcpy(ptr, user_buffer, size);
+
+   return ptr;
+}
+
+
+void
+swr_init_scratch_buffers(struct swr_context *ctx)
+{
+   struct swr_scratch_buffers *scratch;
+
+   scratch = CALLOC_STRUCT(swr_scratch_buffers);
+   ctx->scratch = scratch;
+}
+
+void
+swr_destroy_scratch_buffers(struct swr_context *ctx)
+{
+   struct swr_scratch_buffers *scratch = ctx->scratch;
+
+   if (scratch) {
+      if (scratch->vs_constants.base)
+         align_free(scratch->vs_constants.base);
+      if (scratch->fs_constants.base)
+         align_free(scratch->fs_constants.base);
+      if (scratch->vertex_buffer.base)
+         align_free(scratch->vertex_buffer.base);
+      if (scratch->index_buffer.base)
+         align_free(scratch->index_buffer.base);
+      FREE(scratch);
+   }
+}
diff --git a/src/gallium/drivers/swr/swr_scratch.h b/src/gallium/drivers/swr/swr_scratch.h
new file mode 100644 (file)
index 0000000..74218d6
--- /dev/null
@@ -0,0 +1,63 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#ifndef SWR_SCRATCH_H
+#define SWR_SCRATCH_H
+
+struct swr_scratch_space {
+   void *head;
+   unsigned int current_size;
+   /* TODO XXX: Add a fence for wrap condition. */
+
+   void *base;
+};
+
+struct swr_scratch_buffers {
+   struct swr_scratch_space vs_constants;
+   struct swr_scratch_space fs_constants;
+   struct swr_scratch_space vertex_buffer;
+   struct swr_scratch_space index_buffer;
+};
+
+
+/*
+ * swr_copy_to_scratch_space
+ * Copies size bytes of user_buffer into the scratch ring buffer.
+ * Used to store temporary data such as client arrays and constants.
+ *
+ * Inputs:
+ *   space ptr to scratch pool (vs_constants, fs_constants)
+ *   user_buffer, data to copy into scratch space
+ *   size to be copied
+ * Returns:
+ *   pointer to data copied to scratch space.
+ */
+void *swr_copy_to_scratch_space(struct swr_context *ctx,
+                                struct swr_scratch_space *space,
+                                const void *user_buffer,
+                                unsigned int size);
+
+void swr_init_scratch_buffers(struct swr_context *ctx);
+void swr_destroy_scratch_buffers(struct swr_context *ctx);
+
+#endif
diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp
new file mode 100644 (file)
index 0000000..f0d48cd
--- /dev/null
@@ -0,0 +1,746 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#include "pipe/p_screen.h"
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_cpu_detect.h"
+
+#include "state_tracker/sw_winsys.h"
+
+extern "C" {
+#include "gallivm/lp_bld_limits.h"
+}
+
+#include "swr_public.h"
+#include "swr_screen.h"
+#include "swr_context.h"
+#include "swr_resource.h"
+#include "swr_fence.h"
+#include "gen_knobs.h"
+
+#include "jit_api.h"
+
+#include <stdio.h>
+
+/* MSVC case instensitive compare */
+#if defined(PIPE_CC_MSVC)
+   #define strcasecmp lstrcmpiA  
+#endif
+
+/*
+ * Max texture sizes
+ * XXX Check max texture size values against core and sampler.
+ */
+#define SWR_MAX_TEXTURE_SIZE (4 * 1048 * 1048 * 1024ULL) /* 4GB */
+#define SWR_MAX_TEXTURE_2D_LEVELS 14  /* 8K x 8K for now */
+#define SWR_MAX_TEXTURE_3D_LEVELS 12  /* 2K x 2K x 2K for now */
+#define SWR_MAX_TEXTURE_CUBE_LEVELS 14  /* 8K x 8K for now */
+#define SWR_MAX_TEXTURE_ARRAY_LAYERS 512 /* 8K x 512 / 8K x 8K x 512 */
+
+static const char *
+swr_get_name(struct pipe_screen *screen)
+{
+   return "SWR";
+}
+
+static const char *
+swr_get_vendor(struct pipe_screen *screen)
+{
+   return "Intel Corporation";
+}
+
+static boolean
+swr_is_format_supported(struct pipe_screen *screen,
+                        enum pipe_format format,
+                        enum pipe_texture_target target,
+                        unsigned sample_count,
+                        unsigned bind)
+{
+   struct sw_winsys *winsys = swr_screen(screen)->winsys;
+   const struct util_format_description *format_desc;
+
+   assert(target == PIPE_BUFFER || target == PIPE_TEXTURE_1D
+          || target == PIPE_TEXTURE_1D_ARRAY
+          || target == PIPE_TEXTURE_2D
+          || target == PIPE_TEXTURE_2D_ARRAY
+          || target == PIPE_TEXTURE_RECT
+          || target == PIPE_TEXTURE_3D
+          || target == PIPE_TEXTURE_CUBE
+          || target == PIPE_TEXTURE_CUBE_ARRAY);
+
+   format_desc = util_format_description(format);
+   if (!format_desc)
+      return FALSE;
+
+   if (sample_count > 1)
+      return FALSE;
+
+   if (bind
+       & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) {
+      if (!winsys->is_displaytarget_format_supported(winsys, bind, format))
+         return FALSE;
+   }
+
+   if (bind & PIPE_BIND_RENDER_TARGET) {
+      if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
+         return FALSE;
+
+      if (mesa_to_swr_format(format) == (SWR_FORMAT)-1)
+         return FALSE;
+
+      /*
+       * Although possible, it is unnatural to render into compressed or YUV
+       * surfaces. So disable these here to avoid going into weird paths
+       * inside the state trackers.
+       */
+      if (format_desc->block.width != 1 || format_desc->block.height != 1)
+         return FALSE;
+   }
+
+   if (bind & PIPE_BIND_DEPTH_STENCIL) {
+      if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
+         return FALSE;
+
+      if (mesa_to_swr_format(format) == (SWR_FORMAT)-1)
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+static int
+swr_get_param(struct pipe_screen *screen, enum pipe_cap param)
+{
+   switch (param) {
+   case PIPE_CAP_NPOT_TEXTURES:
+   case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+      return 1;
+   case PIPE_CAP_TWO_SIDED_STENCIL:
+      return 1;
+   case PIPE_CAP_SM3:
+      return 1;
+   case PIPE_CAP_ANISOTROPIC_FILTER:
+      return 0;
+   case PIPE_CAP_POINT_SPRITE:
+      return 1;
+   case PIPE_CAP_MAX_RENDER_TARGETS:
+      return PIPE_MAX_COLOR_BUFS;
+   case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+      return 1;
+   case PIPE_CAP_OCCLUSION_QUERY:
+   case PIPE_CAP_QUERY_TIME_ELAPSED:
+   case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+      return 1;
+   case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+      return 1;
+   case PIPE_CAP_TEXTURE_SHADOW_MAP:
+      return 1;
+   case PIPE_CAP_TEXTURE_SWIZZLE:
+      return 1;
+   case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
+      return 0;
+   case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+      return SWR_MAX_TEXTURE_2D_LEVELS;
+   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+      return SWR_MAX_TEXTURE_3D_LEVELS;
+   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+      return SWR_MAX_TEXTURE_CUBE_LEVELS;
+   case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+      return 1;
+   case PIPE_CAP_INDEP_BLEND_ENABLE:
+      return 1;
+   case PIPE_CAP_INDEP_BLEND_FUNC:
+      return 1;
+   case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+      return 0; // Don't support lower left frag coord.
+   case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+   case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+   case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+      return 1;
+   case PIPE_CAP_DEPTH_CLIP_DISABLE:
+      return 1;
+   case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+      return MAX_SO_STREAMS;
+   case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+   case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+      return MAX_ATTRIBUTES;
+   case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+   case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+      return 1024;
+   case PIPE_CAP_MAX_VERTEX_STREAMS:
+      return 1;
+   case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
+      return 2048;
+   case PIPE_CAP_PRIMITIVE_RESTART:
+      return 1;
+   case PIPE_CAP_SHADER_STENCIL_EXPORT:
+      return 1;
+   case PIPE_CAP_TGSI_INSTANCEID:
+   case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+   case PIPE_CAP_START_INSTANCE:
+      return 1;
+   case PIPE_CAP_SEAMLESS_CUBE_MAP:
+   case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+      return 1;
+   case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+      return SWR_MAX_TEXTURE_ARRAY_LAYERS;
+   case PIPE_CAP_MIN_TEXEL_OFFSET:
+      return -8;
+   case PIPE_CAP_MAX_TEXEL_OFFSET:
+      return 7;
+   case PIPE_CAP_CONDITIONAL_RENDER:
+      return 1;
+   case PIPE_CAP_TEXTURE_BARRIER:
+      return 0;
+   case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+   case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: /* draw module */
+   case PIPE_CAP_VERTEX_COLOR_CLAMPED: /* draw module */
+      return 1;
+   case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+      return 1;
+   case PIPE_CAP_GLSL_FEATURE_LEVEL:
+      return 330;
+   case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+      return 0;
+   case PIPE_CAP_COMPUTE:
+      return 0;
+   case PIPE_CAP_USER_VERTEX_BUFFERS:
+   case PIPE_CAP_USER_INDEX_BUFFERS:
+   case PIPE_CAP_USER_CONSTANT_BUFFERS:
+   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+   case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
+      return 1;
+   case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+      return 16;
+   case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+   case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
+   case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+   case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+   case PIPE_CAP_TEXTURE_MULTISAMPLE:
+      return 0;
+   case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+      return 64;
+   case PIPE_CAP_QUERY_TIMESTAMP:
+      return 1;
+   case PIPE_CAP_CUBE_MAP_ARRAY:
+      return 0;
+   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+      return 1;
+   case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+      return 65536;
+   case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+      return 0;
+   case PIPE_CAP_TGSI_TEXCOORD:
+   case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+      return 0;
+   case PIPE_CAP_MAX_VIEWPORTS:
+      return 1;
+   case PIPE_CAP_ENDIANNESS:
+      return PIPE_ENDIAN_NATIVE;
+   case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+   case PIPE_CAP_TEXTURE_GATHER_SM5:
+      return 0;
+   case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+      return 1;
+   case PIPE_CAP_TEXTURE_QUERY_LOD:
+   case PIPE_CAP_SAMPLE_SHADING:
+   case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
+   case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
+   case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+   case PIPE_CAP_SAMPLER_VIEW_TARGET:
+      return 0;
+   case PIPE_CAP_FAKE_SW_MSAA:
+      return 1;
+   case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
+   case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
+      return 0;
+   case PIPE_CAP_DRAW_INDIRECT:
+      return 1;
+
+   case PIPE_CAP_VENDOR_ID:
+      return 0xFFFFFFFF;
+   case PIPE_CAP_DEVICE_ID:
+      return 0xFFFFFFFF;
+   case PIPE_CAP_ACCELERATED:
+      return 0;
+   case PIPE_CAP_VIDEO_MEMORY: {
+      /* XXX: Do we want to return the full amount of system memory ? */
+      uint64_t system_memory;
+
+      if (!os_get_total_physical_memory(&system_memory))
+         return 0;
+
+      return (int)(system_memory >> 20);
+   }
+   case PIPE_CAP_UMA:
+      return 1;
+   case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+      return 1;
+   case PIPE_CAP_CLIP_HALFZ:
+      return 1;
+   case PIPE_CAP_VERTEXID_NOBASE:
+      return 0;
+   case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+      return 1;
+   case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
+      return 0;
+   case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+      return 0; // xxx
+   case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+      return 0;
+   case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+      return 0;
+   case PIPE_CAP_DEPTH_BOUNDS_TEST:
+      return 0; // xxx
+   case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+   case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+      return 1;
+   case PIPE_CAP_TGSI_TXQS:
+   case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+   case PIPE_CAP_SHAREABLE_SHADERS:
+   case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+   case PIPE_CAP_CLEAR_TEXTURE:
+   case PIPE_CAP_DRAW_PARAMETERS:
+   case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+   case PIPE_CAP_MULTI_DRAW_INDIRECT:
+   case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
+   case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+   case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+   case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+   case PIPE_CAP_INVALIDATE_BUFFER:
+   case PIPE_CAP_GENERATE_MIPMAP:
+   case PIPE_CAP_STRING_MARKER:
+   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+   case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+   case PIPE_CAP_QUERY_BUFFER_OBJECT:
+   case PIPE_CAP_QUERY_MEMORY_INFO:
+      return 0;
+   }
+
+   /* should only get here on unhandled cases */
+   debug_printf("Unexpected PIPE_CAP %d query\n", param);
+   return 0;
+}
+
+static int
+swr_get_shader_param(struct pipe_screen *screen,
+                     unsigned shader,
+                     enum pipe_shader_cap param)
+{
+   if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_FRAGMENT)
+      return gallivm_get_shader_param(param);
+
+   // Todo: geometry, tesselation, compute
+   return 0;
+}
+
+
+static float
+swr_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
+{
+   switch (param) {
+   case PIPE_CAPF_MAX_LINE_WIDTH:
+   case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+   case PIPE_CAPF_MAX_POINT_WIDTH:
+      return 255.0; /* arbitrary */
+   case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+      return 0.0;
+   case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+      return 0.0;
+   case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+      return 0.0;
+   case PIPE_CAPF_GUARD_BAND_LEFT:
+   case PIPE_CAPF_GUARD_BAND_TOP:
+   case PIPE_CAPF_GUARD_BAND_RIGHT:
+   case PIPE_CAPF_GUARD_BAND_BOTTOM:
+      return 0.0;
+   }
+   /* should only get here on unhandled cases */
+   debug_printf("Unexpected PIPE_CAPF %d query\n", param);
+   return 0.0;
+}
+
+SWR_FORMAT
+mesa_to_swr_format(enum pipe_format format)
+{
+   const struct util_format_description *format_desc =
+      util_format_description(format);
+   if (!format_desc)
+      return (SWR_FORMAT)-1;
+
+   // more robust check would be comparing all attributes of the formats
+   // luckily format names are mostly standardized
+   for (int i = 0; i < NUM_SWR_FORMATS; i++) {
+      const SWR_FORMAT_INFO &swr_desc = GetFormatInfo((SWR_FORMAT)i);
+
+      if (!strcasecmp(format_desc->short_name, swr_desc.name))
+         return (SWR_FORMAT)i;
+   }
+
+   // ... with some exceptions
+   switch (format) {
+   case PIPE_FORMAT_R8G8B8A8_SRGB:
+      return R8G8B8A8_UNORM_SRGB;
+   case PIPE_FORMAT_B8G8R8A8_SRGB:
+      return B8G8R8A8_UNORM_SRGB;
+   case PIPE_FORMAT_I8_UNORM:
+      return R8_UNORM;
+   case PIPE_FORMAT_Z16_UNORM:
+      return R16_UNORM;
+   case PIPE_FORMAT_Z24X8_UNORM:
+   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+      return R24_UNORM_X8_TYPELESS;
+   case PIPE_FORMAT_Z32_FLOAT:
+      return R32_FLOAT;
+   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+      return R32_FLOAT_X8X24_TYPELESS;
+   case PIPE_FORMAT_L8A8_UNORM:
+      return R8G8_UNORM;
+   default:
+      break;
+   }
+
+   debug_printf("asked to convert unsupported format %s\n",
+                format_desc->name);
+   return (SWR_FORMAT)-1;
+}
+
+static boolean
+swr_displaytarget_layout(struct swr_screen *screen, struct swr_resource *res)
+{
+   struct sw_winsys *winsys = screen->winsys;
+   struct sw_displaytarget *dt;
+
+   UINT stride;
+   dt = winsys->displaytarget_create(winsys,
+                                     res->base.bind,
+                                     res->base.format,
+                                     res->alignedWidth,
+                                     res->alignedHeight,
+                                     64, NULL,
+                                     &stride);
+
+   if (dt == NULL)
+      return FALSE;
+
+   void *map = winsys->displaytarget_map(winsys, dt, 0);
+
+   res->display_target = dt;
+   res->swr.pBaseAddress = (uint8_t*) map;
+
+   /* Clear the display target surface */
+   if (map)
+      memset(map, 0, res->alignedHeight * stride);
+
+   winsys->displaytarget_unmap(winsys, dt);
+
+   return TRUE;
+}
+
+static boolean
+swr_texture_layout(struct swr_screen *screen,
+                   struct swr_resource *res,
+                   boolean allocate)
+{
+   struct pipe_resource *pt = &res->base;
+
+   pipe_format fmt = pt->format;
+   const struct util_format_description *desc = util_format_description(fmt);
+
+   res->has_depth = util_format_has_depth(desc);
+   res->has_stencil = util_format_has_stencil(desc);
+
+   if (res->has_stencil && !res->has_depth)
+      fmt = PIPE_FORMAT_R8_UINT;
+
+   res->swr.width = pt->width0;
+   res->swr.height = pt->height0;
+   res->swr.depth = pt->depth0;
+   res->swr.type = swr_convert_target_type(pt->target);
+   res->swr.tileMode = SWR_TILE_NONE;
+   res->swr.format = mesa_to_swr_format(fmt);
+   res->swr.numSamples = (1 << pt->nr_samples);
+
+   SWR_FORMAT_INFO finfo = GetFormatInfo(res->swr.format);
+
+   unsigned total_size = 0;
+   unsigned width = pt->width0;
+   unsigned height = pt->height0;
+   unsigned depth = pt->depth0;
+   unsigned layers = pt->array_size;
+
+   for (int level = 0; level <= pt->last_level; level++) {
+      unsigned alignedWidth, alignedHeight;
+      unsigned num_slices;
+
+      if (pt->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) {
+         alignedWidth = align(width, KNOB_MACROTILE_X_DIM);
+         alignedHeight = align(height, KNOB_MACROTILE_Y_DIM);
+      } else {
+         alignedWidth = width;
+         alignedHeight = height;
+      }
+
+      if (level == 0) {
+         res->alignedWidth = alignedWidth;
+         res->alignedHeight = alignedHeight;
+      }
+
+      res->row_stride[level] = alignedWidth * finfo.Bpp;
+      res->img_stride[level] = res->row_stride[level] * alignedHeight;
+      res->mip_offsets[level] = total_size;
+
+      if (pt->target == PIPE_TEXTURE_3D)
+         num_slices = depth;
+      else if (pt->target == PIPE_TEXTURE_1D_ARRAY
+               || pt->target == PIPE_TEXTURE_2D_ARRAY
+               || pt->target == PIPE_TEXTURE_CUBE
+               || pt->target == PIPE_TEXTURE_CUBE_ARRAY)
+         num_slices = layers;
+      else
+         num_slices = 1;
+
+      total_size += res->img_stride[level] * num_slices;
+      if (total_size > SWR_MAX_TEXTURE_SIZE)
+         return FALSE;
+
+      width = u_minify(width, 1);
+      height = u_minify(height, 1);
+      depth = u_minify(depth, 1);
+   }
+
+   res->swr.halign = res->alignedWidth;
+   res->swr.valign = res->alignedHeight;
+   res->swr.pitch = res->row_stride[0];
+
+   if (allocate) {
+      res->swr.pBaseAddress = (BYTE *)_aligned_malloc(total_size, 64);
+
+      if (res->has_depth && res->has_stencil) {
+         SWR_FORMAT_INFO finfo = GetFormatInfo(res->secondary.format);
+         res->secondary.width = pt->width0;
+         res->secondary.height = pt->height0;
+         res->secondary.depth = pt->depth0;
+         res->secondary.type = SURFACE_2D;
+         res->secondary.tileMode = SWR_TILE_NONE;
+         res->secondary.format = R8_UINT;
+         res->secondary.numSamples = (1 << pt->nr_samples);
+         res->secondary.pitch = res->alignedWidth * finfo.Bpp;
+
+         res->secondary.pBaseAddress = (BYTE *)_aligned_malloc(
+            res->alignedHeight * res->secondary.pitch, 64);
+      }
+   }
+
+   return TRUE;
+}
+
+static boolean
+swr_can_create_resource(struct pipe_screen *screen,
+                        const struct pipe_resource *templat)
+{
+   struct swr_resource res;
+   memset(&res, 0, sizeof(res));
+   res.base = *templat;
+   return swr_texture_layout(swr_screen(screen), &res, false);
+}
+
+static struct pipe_resource *
+swr_resource_create(struct pipe_screen *_screen,
+                    const struct pipe_resource *templat)
+{
+   struct swr_screen *screen = swr_screen(_screen);
+   struct swr_resource *res = CALLOC_STRUCT(swr_resource);
+   if (!res)
+      return NULL;
+
+   res->base = *templat;
+   pipe_reference_init(&res->base.reference, 1);
+   res->base.screen = &screen->base;
+
+   if (swr_resource_is_texture(&res->base)) {
+      if (res->base.bind & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT
+                            | PIPE_BIND_SHARED)) {
+         /* displayable surface
+          * first call swr_texture_layout without allocating to finish
+          * filling out the SWR_SURFAE_STATE in res */
+         swr_texture_layout(screen, res, false);
+         if (!swr_displaytarget_layout(screen, res))
+            goto fail;
+      } else {
+         /* texture map */
+         if (!swr_texture_layout(screen, res, true))
+            goto fail;
+      }
+   } else {
+      /* other data (vertex buffer, const buffer, etc) */
+      assert(util_format_get_blocksize(templat->format) == 1);
+      assert(templat->height0 == 1);
+      assert(templat->depth0 == 1);
+      assert(templat->last_level == 0);
+
+      /* Easiest to just call swr_texture_layout, as it sets up
+       * SWR_SURFAE_STATE in res */
+      if (!swr_texture_layout(screen, res, true))
+         goto fail;
+   }
+
+   return &res->base;
+
+fail:
+   FREE(res);
+   return NULL;
+}
+
+static void
+swr_resource_destroy(struct pipe_screen *p_screen, struct pipe_resource *pt)
+{
+   struct swr_screen *screen = swr_screen(p_screen);
+   struct swr_resource *res = swr_resource(pt);
+
+   /*
+    * If this resource is attached to a context it may still be in use, check
+    * dependencies before freeing
+    * XXX TODO: don't use SwrWaitForIdle, use fences and come up with a real
+    * resource manager.
+    * XXX It's happened that we get a swr_destroy prior to freeing the
+    * framebuffer resource.  Don't wait on it.
+    */
+   if (res->bound_to_context && !res->display_target) {
+      struct swr_context *ctx =
+         swr_context((pipe_context *)res->bound_to_context);
+      // XXX, don't SwrWaitForIdle!!! Use a fence.
+      SwrWaitForIdle(ctx->swrContext);
+   }
+
+   /*
+    * Free resource primary surface.  If resource is display target, winsys
+    * manages the buffer and will free it on displaytarget_destroy.
+    */
+   if (res->display_target) {
+      /* display target */
+      struct sw_winsys *winsys = screen->winsys;
+      winsys->displaytarget_destroy(winsys, res->display_target);
+   } else
+      _aligned_free(res->swr.pBaseAddress);
+
+   _aligned_free(res->secondary.pBaseAddress);
+
+   FREE(res);
+}
+
+
+static void
+swr_flush_frontbuffer(struct pipe_screen *p_screen,
+                      struct pipe_resource *resource,
+                      unsigned level,
+                      unsigned layer,
+                      void *context_private,
+                      struct pipe_box *sub_box)
+{
+   struct swr_screen *screen = swr_screen(p_screen);
+   struct sw_winsys *winsys = screen->winsys;
+   struct swr_resource *res = swr_resource(resource);
+
+   /* Ensure fence set at flush is finished, before reading frame buffer */
+   swr_fence_finish(p_screen, screen->flush_fence, 0);
+
+   SwrEndFrame(swr_context((pipe_context *)res->bound_to_context));
+
+   assert(res->display_target);
+   if (res->display_target)
+      winsys->displaytarget_display(
+         winsys, res->display_target, context_private, sub_box);
+}
+
+
+static void
+swr_destroy_screen(struct pipe_screen *p_screen)
+{
+   struct swr_screen *screen = swr_screen(p_screen);
+   struct sw_winsys *winsys = screen->winsys;
+
+   fprintf(stderr, "SWR destroy screen!\n");
+
+   swr_fence_finish(p_screen, screen->flush_fence, 0);
+   swr_fence_reference(p_screen, &screen->flush_fence, NULL);
+
+   JitDestroyContext(screen->hJitMgr);
+
+   if (winsys->destroy)
+      winsys->destroy(winsys);
+
+   FREE(screen);
+}
+
+PUBLIC
+struct pipe_screen *
+swr_create_screen(struct sw_winsys *winsys)
+{
+   struct swr_screen *screen = CALLOC_STRUCT(swr_screen);
+
+   if (!screen)
+      return NULL;
+
+   if (!getenv("KNOB_MAX_PRIMS_PER_DRAW")) {
+      g_GlobalKnobs.MAX_PRIMS_PER_DRAW.Value(49152);
+   }
+
+   screen->winsys = winsys;
+   screen->base.get_name = swr_get_name;
+   screen->base.get_vendor = swr_get_vendor;
+   screen->base.is_format_supported = swr_is_format_supported;
+   screen->base.context_create = swr_create_context;
+   screen->base.can_create_resource = swr_can_create_resource;
+
+   screen->base.destroy = swr_destroy_screen;
+   screen->base.get_param = swr_get_param;
+   screen->base.get_shader_param = swr_get_shader_param;
+   screen->base.get_paramf = swr_get_paramf;
+
+   screen->base.resource_create = swr_resource_create;
+   screen->base.resource_destroy = swr_resource_destroy;
+
+   screen->base.flush_frontbuffer = swr_flush_frontbuffer;
+
+   screen->hJitMgr = JitCreateContext(KNOB_SIMD_WIDTH, KNOB_ARCH_STR);
+
+   swr_fence_init(&screen->base);
+
+   return &screen->base;
+}
+
+struct sw_winsys *
+swr_get_winsys(struct pipe_screen *pipe)
+{
+   return ((struct swr_screen *)pipe)->winsys;
+}
+
+struct sw_displaytarget *
+swr_get_displaytarget(struct pipe_resource *resource)
+{
+   return ((struct swr_resource *)resource)->display_target;
+}
diff --git a/src/gallium/drivers/swr/swr_screen.h b/src/gallium/drivers/swr/swr_screen.h
new file mode 100644 (file)
index 0000000..a96dc44
--- /dev/null
@@ -0,0 +1,52 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#ifndef SWR_SCREEN_H
+#define SWR_SCREEN_H
+
+#include "pipe/p_screen.h"
+#include "pipe/p_defines.h"
+#include "api.h"
+
+struct sw_winsys;
+
+struct swr_screen {
+   struct pipe_screen base;
+
+   struct pipe_fence_handle *flush_fence;
+
+   struct sw_winsys *winsys;
+
+   HANDLE hJitMgr;
+};
+
+static INLINE struct swr_screen *
+swr_screen(struct pipe_screen *pipe)
+{
+   return (struct swr_screen *)pipe;
+}
+
+SWR_FORMAT
+mesa_to_swr_format(enum pipe_format format);
+
+#endif
diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp
new file mode 100644 (file)
index 0000000..ff16d0f
--- /dev/null
@@ -0,0 +1,591 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#include "JitManager.h"
+#include "state.h"
+#include "state_llvm.h"
+#include "builder.h"
+
+#include "llvm-c/Core.h"
+#include "llvm/Support/CBindingWrapping.h"
+
+#include "tgsi/tgsi_strings.h"
+#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_flow.h"
+#include "gallivm/lp_bld_struct.h"
+#include "gallivm/lp_bld_tgsi.h"
+
+#include "swr_context.h"
+#include "swr_context_llvm.h"
+#include "swr_state.h"
+#include "swr_screen.h"
+
+bool operator==(const swr_jit_key &lhs, const swr_jit_key &rhs)
+{
+   return !memcmp(&lhs, &rhs, sizeof(lhs));
+}
+
+void
+swr_generate_fs_key(struct swr_jit_key &key,
+                    struct swr_context *ctx,
+                    swr_fragment_shader *swr_fs)
+{
+   key.nr_cbufs = ctx->framebuffer.nr_cbufs;
+   key.light_twoside = ctx->rasterizer->light_twoside;
+   memcpy(&key.vs_output_semantic_name,
+          &ctx->vs->info.base.output_semantic_name,
+          sizeof(key.vs_output_semantic_name));
+   memcpy(&key.vs_output_semantic_idx,
+          &ctx->vs->info.base.output_semantic_index,
+          sizeof(key.vs_output_semantic_idx));
+
+   key.nr_samplers = swr_fs->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
+
+   for (unsigned i = 0; i < key.nr_samplers; i++) {
+      if (swr_fs->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
+         lp_sampler_static_sampler_state(
+            &key.sampler[i].sampler_state,
+            ctx->samplers[PIPE_SHADER_FRAGMENT][i]);
+      }
+   }
+
+   /*
+    * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
+    * are dx10-style? Can't really have mixed opcodes, at least not
+    * if we want to skip the holes here (without rescanning tgsi).
+    */
+   if (swr_fs->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
+      key.nr_sampler_views =
+         swr_fs->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
+      for (unsigned i = 0; i < key.nr_sampler_views; i++) {
+         if (swr_fs->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) {
+            lp_sampler_static_texture_state(
+               &key.sampler[i].texture_state,
+               ctx->sampler_views[PIPE_SHADER_FRAGMENT][i]);
+         }
+      }
+   } else {
+      key.nr_sampler_views = key.nr_samplers;
+      for (unsigned i = 0; i < key.nr_sampler_views; i++) {
+         if (swr_fs->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
+            lp_sampler_static_texture_state(
+               &key.sampler[i].texture_state,
+               ctx->sampler_views[PIPE_SHADER_FRAGMENT][i]);
+         }
+      }
+   }
+}
+
+struct BuilderSWR : public Builder {
+   BuilderSWR(JitManager *pJitMgr)
+      : Builder(pJitMgr)
+   {
+      pJitMgr->SetupNewModule();
+   }
+
+   PFN_VERTEX_FUNC
+   CompileVS(struct pipe_context *ctx, swr_vertex_shader *swr_vs);
+   PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_key &key);
+};
+
+PFN_VERTEX_FUNC
+BuilderSWR::CompileVS(struct pipe_context *ctx, swr_vertex_shader *swr_vs)
+{
+   swr_vs->linkageMask = 0;
+
+   for (unsigned i = 0; i < swr_vs->info.base.num_outputs; i++) {
+      switch (swr_vs->info.base.output_semantic_name[i]) {
+      case TGSI_SEMANTIC_POSITION:
+         break;
+      default:
+         swr_vs->linkageMask |= (1 << i);
+         break;
+      }
+   }
+
+   //   tgsi_dump(swr_vs->pipe.tokens, 0);
+
+   struct gallivm_state *gallivm =
+      gallivm_create("VS", wrap(&JM()->mContext));
+   gallivm->module = wrap(JM()->mpCurrentModule);
+
+   LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
+   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
+
+   memset(outputs, 0, sizeof(outputs));
+
+   AttrBuilder attrBuilder;
+   attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
+   AttributeSet attrSet = AttributeSet::get(
+      JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
+
+   std::vector<Type *> vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
+                              PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
+   FunctionType *vsFuncType =
+      FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false);
+
+   // create new vertex shader function
+   auto pFunction = Function::Create(vsFuncType,
+                                     GlobalValue::ExternalLinkage,
+                                     "VS",
+                                     JM()->mpCurrentModule);
+   pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
+
+   BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
+   IRB()->SetInsertPoint(block);
+   LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
+
+   auto argitr = pFunction->arg_begin();
+   Value *hPrivateData = &*argitr++;
+   hPrivateData->setName("hPrivateData");
+   Value *pVsCtx = &*argitr++;
+   pVsCtx->setName("vsCtx");
+   
+   Value *consts_ptr = GEP(hPrivateData, {C(0), C(swr_draw_context_constantVS)});
+
+   consts_ptr->setName("vs_constants");
+   Value *const_sizes_ptr =
+      GEP(hPrivateData, {0, swr_draw_context_num_constantsVS});
+   const_sizes_ptr->setName("num_vs_constants");
+
+   Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin});
+
+   for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
+      const unsigned mask = swr_vs->info.base.input_usage_mask[attrib];
+      for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
+         if (mask & (1 << channel)) {
+            inputs[attrib][channel] =
+               wrap(LOAD(vtxInput, {0, 0, attrib, channel}));
+         }
+      }
+   }
+
+   struct lp_bld_tgsi_system_values system_values;
+   memset(&system_values, 0, sizeof(system_values));
+   system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID}));
+   system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID}));
+
+   lp_build_tgsi_soa(gallivm,
+                     swr_vs->pipe.tokens,
+                     lp_type_float_vec(32, 32 * 8),
+                     NULL, // mask
+                     wrap(consts_ptr),
+                     wrap(const_sizes_ptr),
+                     &system_values,
+                     inputs,
+                     outputs,
+                     NULL, // wrap(hPrivateData), (sampler context)
+                     NULL, // thread data
+                     NULL, // sampler
+                     &swr_vs->info.base,
+                     NULL); // geometry shader face
+
+   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
+
+   Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout});
+
+   for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
+      for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) {
+         if (!outputs[attrib][channel])
+            continue;
+
+         Value *val = LOAD(unwrap(outputs[attrib][channel]));
+
+         uint32_t outSlot = attrib;
+         if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE)
+            outSlot = VERTEX_POINT_SIZE_SLOT;
+         STORE(val, vtxOutput, {0, 0, outSlot, channel});
+      }
+   }
+
+   RET_VOID();
+
+   gallivm_verify_function(gallivm, wrap(pFunction));
+   gallivm_compile_module(gallivm);
+
+   //   lp_debug_dump_value(func);
+
+   PFN_VERTEX_FUNC pFunc =
+      (PFN_VERTEX_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
+
+   debug_printf("vert shader  %p\n", pFunc);
+   assert(pFunc && "Error: VertShader = NULL");
+
+#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR >= 5)
+   JM()->mIsModuleFinalized = true;
+#endif
+
+   return pFunc;
+}
+
+PFN_VERTEX_FUNC
+swr_compile_vs(struct pipe_context *ctx, swr_vertex_shader *swr_vs)
+{
+   BuilderSWR builder(
+      reinterpret_cast<JitManager *>(swr_screen(ctx->screen)->hJitMgr));
+   return builder.CompileVS(ctx, swr_vs);
+}
+
+static unsigned
+locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info)
+{
+   for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
+      if ((info->output_semantic_name[i] == name)
+          && (info->output_semantic_index[i] == index)) {
+         return i - 1; // position is not part of the linkage
+      }
+   }
+
+   if (name == TGSI_SEMANTIC_COLOR) { // BCOLOR fallback
+      for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
+         if ((info->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR)
+             && (info->output_semantic_index[i] == index)) {
+            return i - 1; // position is not part of the linkage
+         }
+      }
+   }
+
+   return 0xFFFFFFFF;
+}
+
+PFN_PIXEL_KERNEL
+BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_key &key)
+{
+   struct swr_fragment_shader *swr_fs = ctx->fs;
+
+   //   tgsi_dump(swr_fs->pipe.tokens, 0);
+
+   struct gallivm_state *gallivm =
+      gallivm_create("FS", wrap(&JM()->mContext));
+   gallivm->module = wrap(JM()->mpCurrentModule);
+
+   LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
+   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
+
+   memset(inputs, 0, sizeof(inputs));
+   memset(outputs, 0, sizeof(outputs));
+
+   struct lp_build_sampler_soa *sampler = NULL;
+
+   AttrBuilder attrBuilder;
+   attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
+   AttributeSet attrSet = AttributeSet::get(
+      JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
+
+   std::vector<Type *> fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
+                              PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
+   FunctionType *funcType =
+      FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false);
+
+   auto pFunction = Function::Create(funcType,
+                                     GlobalValue::ExternalLinkage,
+                                     "FS",
+                                     JM()->mpCurrentModule);
+   pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
+
+   BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
+   IRB()->SetInsertPoint(block);
+   LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
+
+   auto args = pFunction->arg_begin();
+   Value *hPrivateData = &*args++;
+   hPrivateData->setName("hPrivateData");
+   Value *pPS = &*args++;
+   pPS->setName("psCtx");
+
+   Value *consts_ptr = GEP(hPrivateData, {0, swr_draw_context_constantFS});
+   consts_ptr->setName("fs_constants");
+   Value *const_sizes_ptr =
+      GEP(hPrivateData, {0, swr_draw_context_num_constantsFS});
+   const_sizes_ptr->setName("num_fs_constants");
+
+   // xxx should check for flat shading versus interpolation
+
+
+   // load *pAttribs, *pPerspAttribs
+   Value *pRawAttribs = LOAD(pPS, {0, SWR_PS_CONTEXT_pAttribs}, "pRawAttribs");
+   Value *pPerspAttribs =
+      LOAD(pPS, {0, SWR_PS_CONTEXT_pPerspAttribs}, "pPerspAttribs");
+
+   swr_fs->constantMask = 0;
+   swr_fs->pointSpriteMask = 0;
+
+   for (int attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
+      const unsigned mask = swr_fs->info.base.input_usage_mask[attrib];
+      const unsigned interpMode = swr_fs->info.base.input_interpolate[attrib];
+      const unsigned interpLoc = swr_fs->info.base.input_interpolate_loc[attrib];
+
+      if (!mask)
+         continue;
+
+      // load i,j
+      Value *vi = nullptr, *vj = nullptr;
+      switch (interpLoc) {
+      case TGSI_INTERPOLATE_LOC_CENTER:
+         vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_center}, "i");
+         vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_center}, "j");
+         break;
+      case TGSI_INTERPOLATE_LOC_CENTROID:
+         vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_centroid}, "i");
+         vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_centroid}, "j");
+         break;
+      case TGSI_INTERPOLATE_LOC_SAMPLE:
+         vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_sample}, "i");
+         vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_sample}, "j");
+         break;
+      }
+
+      // load/compute w
+      Value *vw = nullptr, *pAttribs;
+      if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE) {
+         pAttribs = pPerspAttribs;
+         switch (interpLoc) {
+         case TGSI_INTERPOLATE_LOC_CENTER:
+            vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}));
+            break;
+         case TGSI_INTERPOLATE_LOC_CENTROID:
+            vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_centroid}));
+            break;
+         case TGSI_INTERPOLATE_LOC_SAMPLE:
+            vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_sample}));
+            break;
+         }
+      } else {
+         pAttribs = pRawAttribs;
+         vw = VIMMED1(1.f);
+      }
+
+      vw->setName("w");
+
+      ubyte semantic_name = swr_fs->info.base.input_semantic_name[attrib];
+      ubyte semantic_idx = swr_fs->info.base.input_semantic_index[attrib];
+
+      if (semantic_name == TGSI_SEMANTIC_FACE) {
+         Value *ff =
+            UI_TO_FP(LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), mFP32Ty);
+         ff = FSUB(FMUL(ff, C(2.0f)), C(1.0f));
+         ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vFrontFace");
+
+         inputs[attrib][0] = wrap(ff);
+         inputs[attrib][1] = wrap(VIMMED1(0.0f));
+         inputs[attrib][2] = wrap(VIMMED1(0.0f));
+         inputs[attrib][3] = wrap(VIMMED1(1.0f));
+         continue;
+      } else if (semantic_name == TGSI_SEMANTIC_POSITION) { // gl_FragCoord
+         inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_center}, "vX"));
+         inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_center}, "vY"));
+         inputs[attrib][2] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vZ}, "vZ"));
+         inputs[attrib][3] =
+            wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}, "vOneOverW"));
+         continue;
+      } else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
+         Value *primID = LOAD(pPS, {0, SWR_PS_CONTEXT_primID}, "primID");
+         inputs[attrib][0] = wrap(VECTOR_SPLAT(JM()->mVWidth, primID));
+         inputs[attrib][1] = wrap(VIMMED1(0));
+         inputs[attrib][2] = wrap(VIMMED1(0));
+         inputs[attrib][3] = wrap(VIMMED1(0));
+         continue;
+      }
+
+      unsigned linkedAttrib =
+         locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base);
+      if (linkedAttrib == 0xFFFFFFFF) {
+         // not found - check for point sprite
+         if (ctx->rasterizer->sprite_coord_enable) {
+            linkedAttrib = ctx->vs->info.base.num_outputs - 1;
+            swr_fs->pointSpriteMask |= (1 << linkedAttrib);
+         } else {
+            fprintf(stderr,
+                    "Missing %s[%d]\n",
+                    tgsi_semantic_names[semantic_name],
+                    semantic_idx);
+            assert(0 && "attribute linkage not found");
+         }
+      }
+
+      if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
+         swr_fs->constantMask |= 1 << linkedAttrib;
+      }
+
+      for (int channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
+         if (mask & (1 << channel)) {
+            Value *indexA = C(linkedAttrib * 12 + channel);
+            Value *indexB = C(linkedAttrib * 12 + channel + 4);
+            Value *indexC = C(linkedAttrib * 12 + channel + 8);
+
+            if ((semantic_name == TGSI_SEMANTIC_COLOR)
+                && ctx->rasterizer->light_twoside) {
+               unsigned bcolorAttrib = locate_linkage(
+                  TGSI_SEMANTIC_BCOLOR, semantic_idx, &ctx->vs->info.base);
+
+               unsigned diff = 12 * (bcolorAttrib - linkedAttrib);
+
+               Value *back =
+                  XOR(C(1), LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), "backFace");
+
+               Value *offset = MUL(back, C(diff));
+               offset->setName("offset");
+
+               indexA = ADD(indexA, offset);
+               indexB = ADD(indexB, offset);
+               indexC = ADD(indexC, offset);
+
+               if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
+                  swr_fs->constantMask |= 1 << bcolorAttrib;
+               }
+            }
+
+            Value *va = VBROADCAST(LOAD(GEP(pAttribs, indexA)));
+            Value *vb = VBROADCAST(LOAD(GEP(pAttribs, indexB)));
+            Value *vc = VBROADCAST(LOAD(GEP(pAttribs, indexC)));
+
+            if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
+               inputs[attrib][channel] = wrap(va);
+            } else {
+               Value *vk = FSUB(FSUB(VIMMED1(1.0f), vi), vj);
+
+               vc = FMUL(vk, vc);
+
+               Value *interp = FMUL(va, vi);
+               Value *interp1 = FMUL(vb, vj);
+               interp = FADD(interp, interp1);
+               interp = FADD(interp, vc);
+               if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE)
+                  interp = FMUL(interp, vw);
+               inputs[attrib][channel] = wrap(interp);
+            }
+         }
+      }
+   }
+
+   sampler = swr_sampler_soa_create(key.sampler);
+
+   struct lp_bld_tgsi_system_values system_values;
+   memset(&system_values, 0, sizeof(system_values));
+
+   struct lp_build_mask_context mask;
+
+   if (swr_fs->info.base.uses_kill) {
+      Value *mask_val = LOAD(pPS, {0, SWR_PS_CONTEXT_activeMask}, "activeMask");
+      lp_build_mask_begin(
+         &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(mask_val));
+   }
+
+   lp_build_tgsi_soa(gallivm,
+                     swr_fs->pipe.tokens,
+                     lp_type_float_vec(32, 32 * 8),
+                     swr_fs->info.base.uses_kill ? &mask : NULL, // mask
+                     wrap(consts_ptr),
+                     wrap(const_sizes_ptr),
+                     &system_values,
+                     inputs,
+                     outputs,
+                     wrap(hPrivateData),
+                     NULL, // thread data
+                     sampler, // sampler
+                     &swr_fs->info.base,
+                     NULL); // geometry shader face
+
+   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
+
+   for (uint32_t attrib = 0; attrib < swr_fs->info.base.num_outputs;
+        attrib++) {
+      switch (swr_fs->info.base.output_semantic_name[attrib]) {
+      case TGSI_SEMANTIC_POSITION: {
+         // write z
+         LLVMValueRef outZ =
+            LLVMBuildLoad(gallivm->builder, outputs[attrib][2], "");
+         STORE(unwrap(outZ), pPS, {0, SWR_PS_CONTEXT_vZ});
+         break;
+      }
+      case TGSI_SEMANTIC_COLOR: {
+         for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
+            if (!outputs[attrib][channel])
+               continue;
+
+            LLVMValueRef out =
+               LLVMBuildLoad(gallivm->builder, outputs[attrib][channel], "");
+            if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) {
+               for (uint32_t rt = 0; rt < key.nr_cbufs; rt++) {
+                  STORE(unwrap(out),
+                        pPS,
+                        {0, SWR_PS_CONTEXT_shaded, rt, channel});
+               }
+            } else {
+               STORE(unwrap(out),
+                     pPS,
+                     {0,
+                           SWR_PS_CONTEXT_shaded,
+                           swr_fs->info.base.output_semantic_index[attrib],
+                           channel});
+            }
+         }
+         break;
+      }
+      default: {
+         fprintf(stderr,
+                 "unknown output from FS %s[%d]\n",
+                 tgsi_semantic_names[swr_fs->info.base
+                                        .output_semantic_name[attrib]],
+                 swr_fs->info.base.output_semantic_index[attrib]);
+         break;
+      }
+      }
+   }
+
+   LLVMValueRef mask_result = 0;
+   if (swr_fs->info.base.uses_kill) {
+      mask_result = lp_build_mask_end(&mask);
+   }
+
+   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
+
+   if (swr_fs->info.base.uses_kill) {
+      STORE(unwrap(mask_result), pPS, {0, SWR_PS_CONTEXT_activeMask});
+   }
+
+   RET_VOID();
+
+   gallivm_verify_function(gallivm, wrap(pFunction));
+
+   gallivm_compile_module(gallivm);
+
+   PFN_PIXEL_KERNEL kernel =
+      (PFN_PIXEL_KERNEL)gallivm_jit_function(gallivm, wrap(pFunction));
+   debug_printf("frag shader  %p\n", kernel);
+   assert(kernel && "Error: FragShader = NULL");
+
+#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR >= 5)
+   JM()->mIsModuleFinalized = true;
+#endif
+
+   return kernel;
+}
+
+PFN_PIXEL_KERNEL
+swr_compile_fs(struct swr_context *ctx, swr_jit_key &key)
+{
+   BuilderSWR builder(
+      reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr));
+   return builder.CompileFS(ctx, key);
+}
diff --git a/src/gallium/drivers/swr/swr_shader.h b/src/gallium/drivers/swr/swr_shader.h
new file mode 100644 (file)
index 0000000..e22a7c4
--- /dev/null
@@ -0,0 +1,60 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#pragma once
+
+class swr_vertex_shader;
+class swr_fragment_shader;
+class swr_jit_key;
+
+PFN_VERTEX_FUNC
+swr_compile_vs(struct pipe_context *ctx, swr_vertex_shader *swr_vs);
+
+PFN_PIXEL_KERNEL
+swr_compile_fs(struct swr_context *ctx, swr_jit_key &key);
+
+void swr_generate_fs_key(struct swr_jit_key &key,
+                         struct swr_context *ctx,
+                         swr_fragment_shader *swr_fs);
+
+struct swr_jit_key {
+   unsigned nr_cbufs;
+   unsigned light_twoside;
+   ubyte vs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
+   ubyte vs_output_semantic_idx[PIPE_MAX_SHADER_OUTPUTS];
+   unsigned nr_samplers;
+   unsigned nr_sampler_views;
+   struct swr_sampler_static_state sampler[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+};
+
+namespace std
+{
+template <> struct hash<swr_jit_key> {
+   std::size_t operator()(const swr_jit_key &k) const
+   {
+      return util_hash_crc32(&k, sizeof(k));
+   }
+};
+};
+
+bool operator==(const swr_jit_key &lhs, const swr_jit_key &rhs);
diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
new file mode 100644 (file)
index 0000000..49035b5
--- /dev/null
@@ -0,0 +1,1370 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#include "common/os.h"
+#include "jit_api.h"
+#include "JitManager.h"
+#include "state_llvm.h"
+
+#include "gallivm/lp_bld_tgsi.h"
+#include "util/u_format.h"
+
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_helpers.h"
+#include "util/u_framebuffer.h"
+
+#include "swr_state.h"
+#include "swr_context.h"
+#include "swr_context_llvm.h"
+#include "swr_screen.h"
+#include "swr_resource.h"
+#include "swr_tex_sample.h"
+#include "swr_scratch.h"
+#include "swr_shader.h"
+
+/* These should be pulled out into separate files as necessary
+ * Just initializing everything here to get going. */
+
+static void *
+swr_create_blend_state(struct pipe_context *pipe,
+                       const struct pipe_blend_state *blend)
+{
+   struct swr_blend_state *state = CALLOC_STRUCT(swr_blend_state);
+
+   memcpy(&state->pipe, blend, sizeof(*blend));
+
+   struct pipe_blend_state *pipe_blend = &state->pipe;
+
+   for (int target = 0;
+        target < std::min(SWR_NUM_RENDERTARGETS, PIPE_MAX_COLOR_BUFS);
+        target++) {
+
+      struct pipe_rt_blend_state *rt_blend = &pipe_blend->rt[target];
+      SWR_RENDER_TARGET_BLEND_STATE &blendState =
+         state->blendState.renderTarget[target];
+      RENDER_TARGET_BLEND_COMPILE_STATE &compileState =
+         state->compileState[target];
+
+      if (target != 0 && !pipe_blend->independent_blend_enable) {
+         memcpy(&compileState,
+                &state->compileState[0],
+                sizeof(RENDER_TARGET_BLEND_COMPILE_STATE));
+         continue;
+      }
+
+      compileState.blendEnable = rt_blend->blend_enable;
+      if (compileState.blendEnable) {
+         compileState.sourceAlphaBlendFactor =
+            swr_convert_blend_factor(rt_blend->alpha_src_factor);
+         compileState.destAlphaBlendFactor =
+            swr_convert_blend_factor(rt_blend->alpha_dst_factor);
+         compileState.sourceBlendFactor =
+            swr_convert_blend_factor(rt_blend->rgb_src_factor);
+         compileState.destBlendFactor =
+            swr_convert_blend_factor(rt_blend->rgb_dst_factor);
+
+         compileState.colorBlendFunc =
+            swr_convert_blend_func(rt_blend->rgb_func);
+         compileState.alphaBlendFunc =
+            swr_convert_blend_func(rt_blend->alpha_func);
+      }
+      compileState.logicOpEnable = state->pipe.logicop_enable;
+      if (compileState.logicOpEnable) {
+         compileState.logicOpFunc =
+            swr_convert_logic_op(state->pipe.logicop_func);
+      }
+
+      blendState.writeDisableRed =
+         (rt_blend->colormask & PIPE_MASK_R) ? 0 : 1;
+      blendState.writeDisableGreen =
+         (rt_blend->colormask & PIPE_MASK_G) ? 0 : 1;
+      blendState.writeDisableBlue =
+         (rt_blend->colormask & PIPE_MASK_B) ? 0 : 1;
+      blendState.writeDisableAlpha =
+         (rt_blend->colormask & PIPE_MASK_A) ? 0 : 1;
+
+      if (rt_blend->colormask == 0)
+         compileState.blendEnable = false;
+   }
+
+   return state;
+}
+
+static void
+swr_bind_blend_state(struct pipe_context *pipe, void *blend)
+{
+   struct swr_context *ctx = swr_context(pipe);
+
+   if (ctx->blend == blend)
+      return;
+
+   ctx->blend = (swr_blend_state *)blend;
+
+   ctx->dirty |= SWR_NEW_BLEND;
+}
+
+static void
+swr_delete_blend_state(struct pipe_context *pipe, void *blend)
+{
+   FREE(blend);
+}
+
+static void
+swr_set_blend_color(struct pipe_context *pipe,
+                    const struct pipe_blend_color *color)
+{
+   struct swr_context *ctx = swr_context(pipe);
+
+   ctx->blend_color = *color;
+
+   ctx->dirty |= SWR_NEW_BLEND;
+}
+
+static void
+swr_set_stencil_ref(struct pipe_context *pipe,
+                    const struct pipe_stencil_ref *ref)
+{
+   struct swr_context *ctx = swr_context(pipe);
+
+   ctx->stencil_ref = *ref;
+
+   ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA;
+}
+
+static void *
+swr_create_depth_stencil_state(
+   struct pipe_context *pipe,
+   const struct pipe_depth_stencil_alpha_state *depth_stencil)
+{
+   struct pipe_depth_stencil_alpha_state *state;
+
+   state = (pipe_depth_stencil_alpha_state *)mem_dup(depth_stencil,
+                                                     sizeof *depth_stencil);
+
+   return state;
+}
+
+static void
+swr_bind_depth_stencil_state(struct pipe_context *pipe, void *depth_stencil)
+{
+   struct swr_context *ctx = swr_context(pipe);
+
+   if (ctx->depth_stencil == (pipe_depth_stencil_alpha_state *)depth_stencil)
+      return;
+
+   ctx->depth_stencil = (pipe_depth_stencil_alpha_state *)depth_stencil;
+
+   ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA;
+}
+
+static void
+swr_delete_depth_stencil_state(struct pipe_context *pipe, void *depth)
+{
+   FREE(depth);
+}
+
+
+static void *
+swr_create_rasterizer_state(struct pipe_context *pipe,
+                            const struct pipe_rasterizer_state *rast)
+{
+   struct pipe_rasterizer_state *state;
+   state = (pipe_rasterizer_state *)mem_dup(rast, sizeof *rast);
+
+   return state;
+}
+
+static void
+swr_bind_rasterizer_state(struct pipe_context *pipe, void *handle)
+{
+   struct swr_context *ctx = swr_context(pipe);
+   const struct pipe_rasterizer_state *rasterizer =
+      (const struct pipe_rasterizer_state *)handle;
+
+   if (ctx->rasterizer == (pipe_rasterizer_state *)rasterizer)
+      return;
+
+   ctx->rasterizer = (pipe_rasterizer_state *)rasterizer;
+
+   ctx->dirty |= SWR_NEW_RASTERIZER;
+}
+
+static void
+swr_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer)
+{
+   FREE(rasterizer);
+}
+
+
+static void *
+swr_create_sampler_state(struct pipe_context *pipe,
+                         const struct pipe_sampler_state *sampler)
+{
+   struct pipe_sampler_state *state =
+      (pipe_sampler_state *)mem_dup(sampler, sizeof *sampler);
+
+   return state;
+}
+
+static void
+swr_bind_sampler_states(struct pipe_context *pipe,
+                        unsigned shader,
+                        unsigned start,
+                        unsigned num,
+                        void **samplers)
+{
+   struct swr_context *ctx = swr_context(pipe);
+   unsigned i;
+
+   assert(shader < PIPE_SHADER_TYPES);
+   assert(start + num <= Elements(ctx->samplers[shader]));
+
+   /* set the new samplers */
+   ctx->num_samplers[shader] = num;
+   for (i = 0; i < num; i++) {
+      ctx->samplers[shader][start + i] = (pipe_sampler_state *)samplers[i];
+   }
+
+   ctx->dirty |= SWR_NEW_SAMPLER;
+}
+
+static void
+swr_delete_sampler_state(struct pipe_context *pipe, void *sampler)
+{
+   FREE(sampler);
+}
+
+
+static struct pipe_sampler_view *
+swr_create_sampler_view(struct pipe_context *pipe,
+                        struct pipe_resource *texture,
+                        const struct pipe_sampler_view *templ)
+{
+   struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
+
+   if (view) {
+      *view = *templ;
+      view->reference.count = 1;
+      view->texture = NULL;
+      pipe_resource_reference(&view->texture, texture);
+      view->context = pipe;
+   }
+
+   return view;
+}
+
+static void
+swr_set_sampler_views(struct pipe_context *pipe,
+                      unsigned shader,
+                      unsigned start,
+                      unsigned num,
+                      struct pipe_sampler_view **views)
+{
+   struct swr_context *ctx = swr_context(pipe);
+   uint i;
+
+   assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
+
+   assert(shader < PIPE_SHADER_TYPES);
+   assert(start + num <= Elements(ctx->sampler_views[shader]));
+
+   /* set the new sampler views */
+   ctx->num_sampler_views[shader] = num;
+   for (i = 0; i < num; i++) {
+      /* Note: we're using pipe_sampler_view_release() here to work around
+       * a possible crash when the old view belongs to another context that
+       * was already destroyed.
+       */
+      pipe_sampler_view_release(pipe, &ctx->sampler_views[shader][start + i]);
+      pipe_sampler_view_reference(&ctx->sampler_views[shader][start + i],
+                                  views[i]);
+   }
+
+   ctx->dirty |= SWR_NEW_SAMPLER_VIEW;
+}
+
+static void
+swr_sampler_view_destroy(struct pipe_context *pipe,
+                         struct pipe_sampler_view *view)
+{
+   pipe_resource_reference(&view->texture, NULL);
+   FREE(view);
+}
+
+static void *
+swr_create_vs_state(struct pipe_context *pipe,
+                    const struct pipe_shader_state *vs)
+{
+   struct swr_vertex_shader *swr_vs =
+      (swr_vertex_shader *)CALLOC_STRUCT(swr_vertex_shader);
+   if (!swr_vs)
+      return NULL;
+
+   swr_vs->pipe.tokens = tgsi_dup_tokens(vs->tokens);
+   swr_vs->pipe.stream_output = vs->stream_output;
+
+   lp_build_tgsi_info(vs->tokens, &swr_vs->info);
+
+   swr_vs->func = swr_compile_vs(pipe, swr_vs);
+
+   swr_vs->soState = {0};
+
+   if (swr_vs->pipe.stream_output.num_outputs) {
+      pipe_stream_output_info *stream_output = &swr_vs->pipe.stream_output;
+
+      swr_vs->soState.soEnable = true;
+      // soState.rasterizerDisable set on state dirty
+      // soState.streamToRasterizer not used
+
+      for (uint32_t i = 0; i < stream_output->num_outputs; i++) {
+         swr_vs->soState.streamMasks[stream_output->output[i].stream] |=
+            1 << (stream_output->output[i].register_index - 1);
+      }
+      for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) {
+        swr_vs->soState.streamNumEntries[i] =
+             _mm_popcnt_u32(swr_vs->soState.streamMasks[i]);
+       }
+   }
+
+   return swr_vs;
+}
+
+static void
+swr_bind_vs_state(struct pipe_context *pipe, void *vs)
+{
+   struct swr_context *ctx = swr_context(pipe);
+
+   if (ctx->vs == vs)
+      return;
+
+   ctx->vs = (swr_vertex_shader *)vs;
+   ctx->dirty |= SWR_NEW_VS;
+}
+
+static void
+swr_delete_vs_state(struct pipe_context *pipe, void *vs)
+{
+   struct swr_vertex_shader *swr_vs = (swr_vertex_shader *)vs;
+   FREE((void *)swr_vs->pipe.tokens);
+   FREE(vs);
+}
+
+static void *
+swr_create_fs_state(struct pipe_context *pipe,
+                    const struct pipe_shader_state *fs)
+{
+   struct swr_fragment_shader *swr_fs = new swr_fragment_shader;
+   if (!swr_fs)
+      return NULL;
+
+   swr_fs->pipe.tokens = tgsi_dup_tokens(fs->tokens);
+
+   lp_build_tgsi_info(fs->tokens, &swr_fs->info);
+
+   return swr_fs;
+}
+
+
+static void
+swr_bind_fs_state(struct pipe_context *pipe, void *fs)
+{
+   struct swr_context *ctx = swr_context(pipe);
+
+   if (ctx->fs == fs)
+      return;
+
+   ctx->fs = (swr_fragment_shader *)fs;
+   ctx->dirty |= SWR_NEW_FS;
+}
+
+static void
+swr_delete_fs_state(struct pipe_context *pipe, void *fs)
+{
+   struct swr_fragment_shader *swr_fs = (swr_fragment_shader *)fs;
+   FREE((void *)swr_fs->pipe.tokens);
+   delete swr_fs;
+}
+
+
+static void
+swr_set_constant_buffer(struct pipe_context *pipe,
+                        uint shader,
+                        uint index,
+                        struct pipe_constant_buffer *cb)
+{
+   struct swr_context *ctx = swr_context(pipe);
+   struct pipe_resource *constants = cb ? cb->buffer : NULL;
+
+   assert(shader < PIPE_SHADER_TYPES);
+   assert(index < Elements(ctx->constants[shader]));
+
+   /* note: reference counting */
+   util_copy_constant_buffer(&ctx->constants[shader][index], cb);
+
+   if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) {
+      ctx->dirty |= SWR_NEW_VSCONSTANTS;
+   } else if (shader == PIPE_SHADER_FRAGMENT) {
+      ctx->dirty |= SWR_NEW_FSCONSTANTS;
+   }
+
+   if (cb && cb->user_buffer) {
+      pipe_resource_reference(&constants, NULL);
+   }
+}
+
+
+static void *
+swr_create_vertex_elements_state(struct pipe_context *pipe,
+                                 unsigned num_elements,
+                                 const struct pipe_vertex_element *attribs)
+{
+   struct swr_vertex_element_state *velems;
+   assert(num_elements <= PIPE_MAX_ATTRIBS);
+   velems = CALLOC_STRUCT(swr_vertex_element_state);
+   if (velems) {
+      velems->fsState.numAttribs = num_elements;
+      for (unsigned i = 0; i < num_elements; i++) {
+         // XXX: we should do this keyed on the VS usage info
+
+         const struct util_format_description *desc =
+            util_format_description(attribs[i].src_format);
+
+         velems->fsState.layout[i].AlignedByteOffset = attribs[i].src_offset;
+         velems->fsState.layout[i].Format =
+            mesa_to_swr_format(attribs[i].src_format);
+         velems->fsState.layout[i].StreamIndex =
+            attribs[i].vertex_buffer_index;
+         velems->fsState.layout[i].InstanceEnable =
+            attribs[i].instance_divisor != 0;
+         velems->fsState.layout[i].ComponentControl0 =
+            desc->channel[0].type != UTIL_FORMAT_TYPE_VOID
+            ? ComponentControl::StoreSrc
+            : ComponentControl::Store0;
+         velems->fsState.layout[i].ComponentControl1 =
+            desc->channel[1].type != UTIL_FORMAT_TYPE_VOID
+            ? ComponentControl::StoreSrc
+            : ComponentControl::Store0;
+         velems->fsState.layout[i].ComponentControl2 =
+            desc->channel[2].type != UTIL_FORMAT_TYPE_VOID
+            ? ComponentControl::StoreSrc
+            : ComponentControl::Store0;
+         velems->fsState.layout[i].ComponentControl3 =
+            desc->channel[3].type != UTIL_FORMAT_TYPE_VOID
+            ? ComponentControl::StoreSrc
+            : ComponentControl::Store1Fp;
+         velems->fsState.layout[i].ComponentPacking = ComponentEnable::XYZW;
+         velems->fsState.layout[i].InstanceDataStepRate =
+            attribs[i].instance_divisor;
+
+         /* Calculate the pitch of each stream */
+         const SWR_FORMAT_INFO &swr_desc = GetFormatInfo(
+            mesa_to_swr_format(attribs[i].src_format));
+         velems->stream_pitch[attribs[i].vertex_buffer_index] += swr_desc.Bpp;
+      }
+   }
+
+   return velems;
+}
+
+static void
+swr_bind_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+   struct swr_context *ctx = swr_context(pipe);
+   struct swr_vertex_element_state *swr_velems =
+      (struct swr_vertex_element_state *)velems;
+
+   ctx->velems = swr_velems;
+   ctx->dirty |= SWR_NEW_VERTEX;
+}
+
+static void
+swr_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+   /* XXX Need to destroy fetch shader? */
+   FREE(velems);
+}
+
+
+static void
+swr_set_vertex_buffers(struct pipe_context *pipe,
+                       unsigned start_slot,
+                       unsigned num_elements,
+                       const struct pipe_vertex_buffer *buffers)
+{
+   struct swr_context *ctx = swr_context(pipe);
+
+   assert(num_elements <= PIPE_MAX_ATTRIBS);
+
+   util_set_vertex_buffers_count(ctx->vertex_buffer,
+                                 &ctx->num_vertex_buffers,
+                                 buffers,
+                                 start_slot,
+                                 num_elements);
+
+   ctx->dirty |= SWR_NEW_VERTEX;
+}
+
+
+static void
+swr_set_index_buffer(struct pipe_context *pipe,
+                     const struct pipe_index_buffer *ib)
+{
+   struct swr_context *ctx = swr_context(pipe);
+
+   if (ib)
+      memcpy(&ctx->index_buffer, ib, sizeof(ctx->index_buffer));
+   else
+      memset(&ctx->index_buffer, 0, sizeof(ctx->index_buffer));
+
+   ctx->dirty |= SWR_NEW_VERTEX;
+}
+
+static void
+swr_set_polygon_stipple(struct pipe_context *pipe,
+                        const struct pipe_poly_stipple *stipple)
+{
+   struct swr_context *ctx = swr_context(pipe);
+
+   ctx->poly_stipple = *stipple; /* struct copy */
+   ctx->dirty |= SWR_NEW_STIPPLE;
+}
+
+static void
+swr_set_clip_state(struct pipe_context *pipe,
+                   const struct pipe_clip_state *clip)
+{
+   struct swr_context *ctx = swr_context(pipe);
+
+   ctx->clip = *clip;
+   /* XXX Unimplemented, but prevents crash */
+
+   ctx->dirty |= SWR_NEW_CLIP;
+}
+
+
+static void
+swr_set_scissor_states(struct pipe_context *pipe,
+                       unsigned start_slot,
+                       unsigned num_viewports,
+                       const struct pipe_scissor_state *scissor)
+{
+   struct swr_context *ctx = swr_context(pipe);
+
+   ctx->scissor = *scissor;
+   ctx->dirty |= SWR_NEW_SCISSOR;
+}
+
+static void
+swr_set_viewport_states(struct pipe_context *pipe,
+                        unsigned start_slot,
+                        unsigned num_viewports,
+                        const struct pipe_viewport_state *vpt)
+{
+   struct swr_context *ctx = swr_context(pipe);
+
+   ctx->viewport = *vpt;
+   ctx->dirty |= SWR_NEW_VIEWPORT;
+}
+
+
+static void
+swr_set_framebuffer_state(struct pipe_context *pipe,
+                          const struct pipe_framebuffer_state *fb)
+{
+   struct swr_context *ctx = swr_context(pipe);
+
+   boolean changed = !util_framebuffer_state_equal(&ctx->framebuffer, fb);
+
+   assert(fb->width <= KNOB_GUARDBAND_WIDTH);
+   assert(fb->height <= KNOB_GUARDBAND_HEIGHT);
+
+   if (changed) {
+      unsigned i;
+      for (i = 0; i < fb->nr_cbufs; ++i)
+         pipe_surface_reference(&ctx->framebuffer.cbufs[i], fb->cbufs[i]);
+      for (; i < ctx->framebuffer.nr_cbufs; ++i)
+         pipe_surface_reference(&ctx->framebuffer.cbufs[i], NULL);
+
+      ctx->framebuffer.nr_cbufs = fb->nr_cbufs;
+
+      ctx->framebuffer.width = fb->width;
+      ctx->framebuffer.height = fb->height;
+
+      pipe_surface_reference(&ctx->framebuffer.zsbuf, fb->zsbuf);
+
+      ctx->dirty |= SWR_NEW_FRAMEBUFFER;
+   }
+}
+
+
+static void
+swr_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
+{
+   struct swr_context *ctx = swr_context(pipe);
+
+   if (sample_mask != ctx->sample_mask) {
+      ctx->sample_mask = sample_mask;
+      ctx->dirty |= SWR_NEW_RASTERIZER;
+   }
+}
+
+
+void
+swr_update_derived(struct swr_context *ctx,
+                   const struct pipe_draw_info *p_draw_info)
+{
+   /* Any state that requires dirty flags to be re-triggered sets this mask */
+   /* For example, user_buffer vertex and index buffers. */
+   unsigned post_update_dirty_flags = 0;
+
+   /* Render Targets */
+   if (ctx->dirty & SWR_NEW_FRAMEBUFFER) {
+      struct pipe_framebuffer_state *fb = &ctx->framebuffer;
+      SWR_SURFACE_STATE *new_attachment[SWR_NUM_ATTACHMENTS] = {0};
+      UINT i;
+
+      /* colorbuffer targets */
+      if (fb->nr_cbufs)
+         for (i = 0; i < fb->nr_cbufs; ++i)
+            if (fb->cbufs[i]) {
+               struct swr_resource *colorBuffer =
+                  swr_resource(fb->cbufs[i]->texture);
+               new_attachment[SWR_ATTACHMENT_COLOR0 + i] = &colorBuffer->swr;
+            }
+
+      /* depth/stencil target */
+      if (fb->zsbuf) {
+         struct swr_resource *depthStencilBuffer =
+            swr_resource(fb->zsbuf->texture);
+         if (depthStencilBuffer->has_depth) {
+            new_attachment[SWR_ATTACHMENT_DEPTH] = &depthStencilBuffer->swr;
+
+            if (depthStencilBuffer->has_stencil)
+               new_attachment[SWR_ATTACHMENT_STENCIL] =
+                  &depthStencilBuffer->secondary;
+
+         } else if (depthStencilBuffer->has_stencil)
+            new_attachment[SWR_ATTACHMENT_STENCIL] = &depthStencilBuffer->swr;
+      }
+
+      /* Make the attachment updates */
+      swr_draw_context *pDC = &ctx->swrDC;
+      SWR_SURFACE_STATE *renderTargets = pDC->renderTargets;
+      for (i = 0; i < SWR_NUM_ATTACHMENTS; i++) {
+         void *new_base = nullptr;
+         if (new_attachment[i])
+            new_base = new_attachment[i]->pBaseAddress;
+         
+         /* StoreTile for changed target */
+         if (renderTargets[i].pBaseAddress != new_base) {
+            if (renderTargets[i].pBaseAddress) {
+               enum SWR_TILE_STATE post_state = (new_attachment[i]
+                  ? SWR_TILE_INVALID : SWR_TILE_RESOLVED);
+               swr_store_render_target(ctx, i, post_state);
+            }
+
+            /* Make new attachment */
+            if (new_attachment[i])
+               renderTargets[i] = *new_attachment[i];
+            else
+               if (renderTargets[i].pBaseAddress)
+                  renderTargets[i] = {0};
+         }
+      }
+   }
+
+   /* Raster state */
+   if (ctx->dirty & (SWR_NEW_RASTERIZER | SWR_NEW_FRAMEBUFFER)) {
+      pipe_rasterizer_state *rasterizer = ctx->rasterizer;
+      pipe_framebuffer_state *fb = &ctx->framebuffer;
+
+      SWR_RASTSTATE *rastState = &ctx->derived.rastState;
+      rastState->cullMode = swr_convert_cull_mode(rasterizer->cull_face);
+      rastState->frontWinding = rasterizer->front_ccw
+         ? SWR_FRONTWINDING_CCW
+         : SWR_FRONTWINDING_CW;
+      rastState->scissorEnable = rasterizer->scissor;
+      rastState->pointSize = rasterizer->point_size > 0.0f
+         ? rasterizer->point_size
+         : 1.0f;
+      rastState->lineWidth = rasterizer->line_width > 0.0f
+         ? rasterizer->line_width
+         : 1.0f;
+
+      rastState->pointParam = rasterizer->point_size_per_vertex;
+
+      rastState->pointSpriteEnable = rasterizer->sprite_coord_enable;
+      rastState->pointSpriteTopOrigin =
+         rasterizer->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT;
+
+      /* XXX TODO: Add multisample */
+      rastState->msaaRastEnable = false;
+      rastState->rastMode = SWR_MSAA_RASTMODE_OFF_PIXEL;
+      rastState->sampleCount = SWR_MULTISAMPLE_1X;
+      rastState->bForcedSampleCount = false;
+
+      bool do_offset = false;
+      switch (rasterizer->fill_front) {
+      case PIPE_POLYGON_MODE_FILL:
+         do_offset = rasterizer->offset_tri;
+         break;
+      case PIPE_POLYGON_MODE_LINE:
+         do_offset = rasterizer->offset_line;
+         break;
+      case PIPE_POLYGON_MODE_POINT:
+         do_offset = rasterizer->offset_point;
+         break;
+      }
+
+      if (do_offset) {
+         rastState->depthBias = rasterizer->offset_units;
+         rastState->slopeScaledDepthBias = rasterizer->offset_scale;
+         rastState->depthBiasClamp = rasterizer->offset_clamp;
+      } else {
+         rastState->depthBias = 0;
+         rastState->slopeScaledDepthBias = 0;
+         rastState->depthBiasClamp = 0;
+      }
+      struct pipe_surface *zb = fb->zsbuf;
+      if (zb && swr_resource(zb->texture)->has_depth)
+         rastState->depthFormat = swr_resource(zb->texture)->swr.format;
+
+      rastState->depthClipEnable = rasterizer->depth_clip;
+
+      SwrSetRastState(ctx->swrContext, rastState);
+   }
+
+   /* Scissor */
+   if (ctx->dirty & SWR_NEW_SCISSOR) {
+      pipe_scissor_state *scissor = &ctx->scissor;
+      BBOX bbox(scissor->miny, scissor->maxy,
+                scissor->minx, scissor->maxx);
+      SwrSetScissorRects(ctx->swrContext, 1, &bbox);
+   }
+
+   /* Viewport */
+   if (ctx->dirty & (SWR_NEW_VIEWPORT | SWR_NEW_FRAMEBUFFER
+                     | SWR_NEW_RASTERIZER)) {
+      pipe_viewport_state *state = &ctx->viewport;
+      pipe_framebuffer_state *fb = &ctx->framebuffer;
+      pipe_rasterizer_state *rasterizer = ctx->rasterizer;
+
+      SWR_VIEWPORT *vp = &ctx->derived.vp;
+      SWR_VIEWPORT_MATRIX *vpm = &ctx->derived.vpm;
+
+      vp->x = state->translate[0] - state->scale[0];
+      vp->width = state->translate[0] + state->scale[0];
+      vp->y = state->translate[1] - fabs(state->scale[1]);
+      vp->height = state->translate[1] + fabs(state->scale[1]);
+      if (rasterizer->clip_halfz == 0) {
+         vp->minZ = state->translate[2] - state->scale[2];
+         vp->maxZ = state->translate[2] + state->scale[2];
+      } else {
+         vp->minZ = state->translate[2];
+         vp->maxZ = state->translate[2] + state->scale[2];
+      }
+
+      vpm->m00 = state->scale[0];
+      vpm->m11 = state->scale[1];
+      vpm->m22 = state->scale[2];
+      vpm->m30 = state->translate[0];
+      vpm->m31 = state->translate[1];
+      vpm->m32 = state->translate[2];
+
+      /* Now that the matrix is calculated, clip the view coords to screen
+       * size.  OpenGL allows for -ve x,y in the viewport.
+       */
+      vp->x = std::max(vp->x, 0.0f);
+      vp->y = std::max(vp->y, 0.0f);
+      vp->width = std::min(vp->width, (float)fb->width);
+      vp->height = std::min(vp->height, (float)fb->height);
+
+      SwrSetViewports(ctx->swrContext, 1, vp, vpm);
+   }
+
+   /* Set vertex & index buffers */
+   /* (using draw info if called by swr_draw_vbo) */
+   if (ctx->dirty & SWR_NEW_VERTEX) {
+      uint32_t size, pitch, max_vertex, partial_inbounds;
+      const uint8_t *p_data;
+
+      /* If being called by swr_draw_vbo, copy draw details */
+      struct pipe_draw_info info = {0};
+      if (p_draw_info)
+         info = *p_draw_info;
+
+      /* vertex buffers */
+      SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS];
+      for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
+         pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
+
+         pitch = vb->stride;
+         if (!vb->user_buffer) {
+            /* VBO
+             * size is based on buffer->width0 rather than info.max_index
+             * to prevent having to validate VBO on each draw */
+            size = vb->buffer->width0;
+            max_vertex = size / pitch;
+            partial_inbounds = size % pitch;
+
+            p_data = (const uint8_t *)swr_resource_data(vb->buffer)
+               + vb->buffer_offset;
+         } else {
+            /* Client buffer
+             * client memory is one-time use, re-trigger SWR_NEW_VERTEX to
+             * revalidate on each draw */
+            post_update_dirty_flags |= SWR_NEW_VERTEX;
+
+            if (pitch) {
+               size = (info.max_index - info.min_index + 1) * pitch;
+            } else {
+               /* pitch = 0, means constant value
+                * set size to 1 vertex */
+               size = ctx->velems->stream_pitch[i];
+            }
+
+            max_vertex = info.max_index + 1;
+            partial_inbounds = 0;
+
+            /* Copy only needed vertices to scratch space */
+            size = AlignUp(size, 4);
+            const void *ptr = (const uint8_t *) vb->user_buffer
+               + info.min_index * pitch;
+            ptr = swr_copy_to_scratch_space(
+               ctx, &ctx->scratch->vertex_buffer, ptr, size);
+            p_data = (const uint8_t *)ptr - info.min_index * pitch;
+         }
+
+         swrVertexBuffers[i] = {0};
+         swrVertexBuffers[i].index = i;
+         swrVertexBuffers[i].pitch = pitch;
+         swrVertexBuffers[i].pData = p_data;
+         swrVertexBuffers[i].size = size;
+         swrVertexBuffers[i].maxVertex = max_vertex;
+         swrVertexBuffers[i].partialInboundsSize = partial_inbounds;
+      }
+
+      SwrSetVertexBuffers(
+         ctx->swrContext, ctx->num_vertex_buffers, swrVertexBuffers);
+
+      /* index buffer, if required (info passed in by swr_draw_vbo) */
+      SWR_FORMAT index_type = R32_UINT; /* Default for non-indexed draws */
+      if (info.indexed) {
+         pipe_index_buffer *ib = &ctx->index_buffer;
+
+         pitch = ib->index_size ? ib->index_size : sizeof(uint32_t);
+         index_type = swr_convert_index_type(pitch);
+
+         if (!ib->user_buffer) {
+            /* VBO
+             * size is based on buffer->width0 rather than info.count
+             * to prevent having to validate VBO on each draw */
+            size = ib->buffer->width0;
+            p_data =
+               (const uint8_t *)swr_resource_data(ib->buffer) + ib->offset;
+         } else {
+            /* Client buffer
+             * client memory is one-time use, re-trigger SWR_NEW_VERTEX to
+             * revalidate on each draw */
+            post_update_dirty_flags |= SWR_NEW_VERTEX;
+
+            size = info.count * pitch;
+            size = AlignUp(size, 4);
+
+            /* Copy indices to scratch space */
+            const void *ptr = ib->user_buffer;
+            ptr = swr_copy_to_scratch_space(
+               ctx, &ctx->scratch->index_buffer, ptr, size);
+            p_data = (const uint8_t *)ptr;
+         }
+
+         SWR_INDEX_BUFFER_STATE swrIndexBuffer;
+         swrIndexBuffer.format = swr_convert_index_type(ib->index_size);
+         swrIndexBuffer.pIndices = p_data;
+         swrIndexBuffer.size = size;
+
+         SwrSetIndexBuffer(ctx->swrContext, &swrIndexBuffer);
+      }
+
+      struct swr_vertex_element_state *velems = ctx->velems;
+      if (velems && velems->fsState.indexType != index_type) {
+         velems->fsFunc = NULL;
+         velems->fsState.indexType = index_type;
+      }
+   }
+
+   /* VertexShader */
+   if (ctx->dirty & (SWR_NEW_VS | SWR_NEW_FRAMEBUFFER)) {
+      SwrSetVertexFunc(ctx->swrContext, ctx->vs->func);
+   }
+
+   swr_jit_key key;
+   if (ctx->dirty & (SWR_NEW_FS | SWR_NEW_SAMPLER | SWR_NEW_SAMPLER_VIEW
+                     | SWR_NEW_RASTERIZER | SWR_NEW_FRAMEBUFFER)) {
+      memset(&key, 0, sizeof(key));
+      swr_generate_fs_key(key, ctx, ctx->fs);
+      auto search = ctx->fs->map.find(key);
+      PFN_PIXEL_KERNEL func;
+      if (search != ctx->fs->map.end()) {
+         func = search->second;
+      } else {
+         func = swr_compile_fs(ctx, key);
+         ctx->fs->map.insert(std::make_pair(key, func));
+      }
+      SWR_PS_STATE psState = {0};
+      psState.pfnPixelShader = func;
+      psState.killsPixel = ctx->fs->info.base.uses_kill;
+      psState.inputCoverage = SWR_INPUT_COVERAGE_NORMAL;
+      psState.writesODepth = ctx->fs->info.base.writes_z;
+      psState.usesSourceDepth = ctx->fs->info.base.reads_z;
+      psState.shadingRate = SWR_SHADING_RATE_PIXEL; // XXX
+      psState.numRenderTargets = ctx->framebuffer.nr_cbufs;
+      psState.posOffset = SWR_PS_POSITION_SAMPLE_NONE; // XXX msaa
+      uint32_t barycentricsMask = 0;
+#if 0
+      // when we switch to mesa-master
+      if (ctx->fs->info.base.uses_persp_center ||
+          ctx->fs->info.base.uses_linear_center)
+         barycentricsMask |= SWR_BARYCENTRIC_PER_PIXEL_MASK;
+      if (ctx->fs->info.base.uses_persp_centroid ||
+          ctx->fs->info.base.uses_linear_centroid)
+         barycentricsMask |= SWR_BARYCENTRIC_CENTROID_MASK;
+      if (ctx->fs->info.base.uses_persp_sample ||
+          ctx->fs->info.base.uses_linear_sample)
+         barycentricsMask |= SWR_BARYCENTRIC_PER_SAMPLE_MASK;
+#else
+      for (unsigned i = 0; i < ctx->fs->info.base.num_inputs; i++) {
+         switch (ctx->fs->info.base.input_interpolate_loc[i]) {
+         case TGSI_INTERPOLATE_LOC_CENTER:
+            barycentricsMask |= SWR_BARYCENTRIC_PER_PIXEL_MASK;
+            break;
+         case TGSI_INTERPOLATE_LOC_CENTROID:
+            barycentricsMask |= SWR_BARYCENTRIC_CENTROID_MASK;
+            break;
+         case TGSI_INTERPOLATE_LOC_SAMPLE:
+            barycentricsMask |= SWR_BARYCENTRIC_PER_SAMPLE_MASK;
+            break;
+         }
+      }
+#endif
+      psState.barycentricsMask = barycentricsMask;
+      psState.usesUAV = false; // XXX
+      psState.forceEarlyZ = false;
+      SwrSetPixelShaderState(ctx->swrContext, &psState);
+   }
+
+   /* JIT sampler state */
+   if (ctx->dirty & SWR_NEW_SAMPLER) {
+      swr_draw_context *pDC = &ctx->swrDC;
+
+      for (unsigned i = 0; i < key.nr_samplers; i++) {
+         const struct pipe_sampler_state *sampler =
+            ctx->samplers[PIPE_SHADER_FRAGMENT][i];
+
+         if (sampler) {
+            pDC->samplersFS[i].min_lod = sampler->min_lod;
+            pDC->samplersFS[i].max_lod = sampler->max_lod;
+            pDC->samplersFS[i].lod_bias = sampler->lod_bias;
+            COPY_4V(pDC->samplersFS[i].border_color, sampler->border_color.f);
+         }
+      }
+   }
+
+   /* JIT sampler view state */
+   if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
+      swr_draw_context *pDC = &ctx->swrDC;
+
+      for (unsigned i = 0; i < key.nr_sampler_views; i++) {
+         struct pipe_sampler_view *view =
+            ctx->sampler_views[PIPE_SHADER_FRAGMENT][i];
+
+         if (view) {
+            struct pipe_resource *res = view->texture;
+            struct swr_resource *swr_res = swr_resource(res);
+            struct swr_jit_texture *jit_tex = &pDC->texturesFS[i];
+            memset(jit_tex, 0, sizeof(*jit_tex));
+            jit_tex->width = res->width0;
+            jit_tex->height = res->height0;
+            jit_tex->depth = res->depth0;
+            jit_tex->first_level = view->u.tex.first_level;
+            jit_tex->last_level = view->u.tex.last_level;
+            jit_tex->base_ptr = swr_res->swr.pBaseAddress;
+
+            for (unsigned level = jit_tex->first_level;
+                 level <= jit_tex->last_level;
+                 level++) {
+               jit_tex->row_stride[level] = swr_res->row_stride[level];
+               jit_tex->img_stride[level] = swr_res->img_stride[level];
+               jit_tex->mip_offsets[level] = swr_res->mip_offsets[level];
+            }
+         }
+      }
+   }
+
+   /* VertexShader Constants */
+   if (ctx->dirty & SWR_NEW_VSCONSTANTS) {
+      swr_draw_context *pDC = &ctx->swrDC;
+
+      for (UINT i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
+         const pipe_constant_buffer *cb =
+            &ctx->constants[PIPE_SHADER_VERTEX][i];
+         pDC->num_constantsVS[i] = cb->buffer_size;
+         if (cb->buffer)
+            pDC->constantVS[i] =
+               (const float *)((const BYTE *)cb->buffer + cb->buffer_offset);
+         else {
+            /* Need to copy these constants to scratch space */
+            if (cb->user_buffer && cb->buffer_size) {
+               const void *ptr =
+                  ((const BYTE *)cb->user_buffer + cb->buffer_offset);
+               uint32_t size = AlignUp(cb->buffer_size, 4);
+               ptr = swr_copy_to_scratch_space(
+                  ctx, &ctx->scratch->vs_constants, ptr, size);
+               pDC->constantVS[i] = (const float *)ptr;
+            }
+         }
+      }
+   }
+
+   /* FragmentShader Constants */
+   if (ctx->dirty & SWR_NEW_FSCONSTANTS) {
+      swr_draw_context *pDC = &ctx->swrDC;
+
+      for (UINT i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
+         const pipe_constant_buffer *cb =
+            &ctx->constants[PIPE_SHADER_FRAGMENT][i];
+         pDC->num_constantsFS[i] = cb->buffer_size;
+         if (cb->buffer)
+            pDC->constantFS[i] =
+               (const float *)((const BYTE *)cb->buffer + cb->buffer_offset);
+         else {
+            /* Need to copy these constants to scratch space */
+            if (cb->user_buffer && cb->buffer_size) {
+               const void *ptr =
+                  ((const BYTE *)cb->user_buffer + cb->buffer_offset);
+               uint32_t size = AlignUp(cb->buffer_size, 4);
+               ptr = swr_copy_to_scratch_space(
+                  ctx, &ctx->scratch->fs_constants, ptr, size);
+               pDC->constantFS[i] = (const float *)ptr;
+            }
+         }
+      }
+   }
+
+   /* Depth/stencil state */
+   if (ctx->dirty & (SWR_NEW_DEPTH_STENCIL_ALPHA | SWR_NEW_FRAMEBUFFER)) {
+      struct pipe_depth_state *depth = &(ctx->depth_stencil->depth);
+      struct pipe_stencil_state *stencil = ctx->depth_stencil->stencil;
+      SWR_DEPTH_STENCIL_STATE depthStencilState = {{0}};
+
+      /* XXX, incomplete.  Need to flesh out stencil & alpha test state
+      struct pipe_stencil_state *front_stencil =
+      ctx->depth_stencil.stencil[0];
+      struct pipe_stencil_state *back_stencil = ctx->depth_stencil.stencil[1];
+      struct pipe_alpha_state alpha;
+      */
+      if (stencil[0].enabled) {
+         depthStencilState.stencilWriteEnable = 1;
+         depthStencilState.stencilTestEnable = 1;
+         depthStencilState.stencilTestFunc =
+            swr_convert_depth_func(stencil[0].func);
+
+         depthStencilState.stencilPassDepthPassOp =
+            swr_convert_stencil_op(stencil[0].zpass_op);
+         depthStencilState.stencilPassDepthFailOp =
+            swr_convert_stencil_op(stencil[0].zfail_op);
+         depthStencilState.stencilFailOp =
+            swr_convert_stencil_op(stencil[0].fail_op);
+         depthStencilState.stencilWriteMask = stencil[0].writemask;
+         depthStencilState.stencilTestMask = stencil[0].valuemask;
+         depthStencilState.stencilRefValue = ctx->stencil_ref.ref_value[0];
+      }
+      if (stencil[1].enabled) {
+         depthStencilState.doubleSidedStencilTestEnable = 1;
+
+         depthStencilState.backfaceStencilTestFunc =
+            swr_convert_depth_func(stencil[1].func);
+
+         depthStencilState.backfaceStencilPassDepthPassOp =
+            swr_convert_stencil_op(stencil[1].zpass_op);
+         depthStencilState.backfaceStencilPassDepthFailOp =
+            swr_convert_stencil_op(stencil[1].zfail_op);
+         depthStencilState.backfaceStencilFailOp =
+            swr_convert_stencil_op(stencil[1].fail_op);
+         depthStencilState.backfaceStencilWriteMask = stencil[1].writemask;
+         depthStencilState.backfaceStencilTestMask = stencil[1].valuemask;
+
+         depthStencilState.backfaceStencilRefValue =
+            ctx->stencil_ref.ref_value[1];
+      }
+
+      depthStencilState.depthTestEnable = depth->enabled;
+      depthStencilState.depthTestFunc = swr_convert_depth_func(depth->func);
+      depthStencilState.depthWriteEnable = depth->writemask;
+      SwrSetDepthStencilState(ctx->swrContext, &depthStencilState);
+   }
+
+   /* Blend State */
+   if (ctx->dirty & (SWR_NEW_BLEND |
+                     SWR_NEW_FRAMEBUFFER |
+                     SWR_NEW_DEPTH_STENCIL_ALPHA)) {
+      struct pipe_framebuffer_state *fb = &ctx->framebuffer;
+
+      SWR_BLEND_STATE blendState;
+      memcpy(&blendState, &ctx->blend->blendState, sizeof(blendState));
+      blendState.constantColor[0] = ctx->blend_color.color[0];
+      blendState.constantColor[1] = ctx->blend_color.color[1];
+      blendState.constantColor[2] = ctx->blend_color.color[2];
+      blendState.constantColor[3] = ctx->blend_color.color[3];
+      blendState.alphaTestReference =
+         *((uint32_t*)&ctx->depth_stencil->alpha.ref_value);
+
+      // XXX MSAA
+      blendState.sampleMask = 0;
+      blendState.sampleCount = SWR_MULTISAMPLE_1X;
+
+      /* If there are no color buffers bound, disable writes on RT0
+       * and skip loop */
+      if (fb->nr_cbufs == 0) {
+         blendState.renderTarget[0].writeDisableRed = 1;
+         blendState.renderTarget[0].writeDisableGreen = 1;
+         blendState.renderTarget[0].writeDisableBlue = 1;
+         blendState.renderTarget[0].writeDisableAlpha = 1;
+         SwrSetBlendFunc(ctx->swrContext, 0, NULL);
+      }
+      else
+         for (int target = 0;
+               target < std::min(SWR_NUM_RENDERTARGETS,
+                                 PIPE_MAX_COLOR_BUFS);
+               target++) {
+            if (!fb->cbufs[target])
+               continue;
+
+            struct swr_resource *colorBuffer =
+               swr_resource(fb->cbufs[target]->texture);
+
+            BLEND_COMPILE_STATE compileState;
+            memset(&compileState, 0, sizeof(compileState));
+            compileState.format = colorBuffer->swr.format;
+            memcpy(&compileState.blendState,
+                   &ctx->blend->compileState[target],
+                   sizeof(compileState.blendState));
+
+            if (compileState.blendState.blendEnable == false &&
+                compileState.blendState.logicOpEnable == false) {
+               SwrSetBlendFunc(ctx->swrContext, target, NULL);
+               continue;
+            }
+
+            compileState.desc.alphaTestEnable =
+               ctx->depth_stencil->alpha.enabled;
+            compileState.desc.independentAlphaBlendEnable =
+               ctx->blend->pipe.independent_blend_enable;
+            compileState.desc.alphaToCoverageEnable =
+               ctx->blend->pipe.alpha_to_coverage;
+            compileState.desc.sampleMaskEnable = 0; // XXX
+            compileState.desc.numSamples = 1; // XXX
+
+            compileState.alphaTestFunction =
+               swr_convert_depth_func(ctx->depth_stencil->alpha.func);
+            compileState.alphaTestFormat = ALPHA_TEST_FLOAT32; // xxx
+
+            PFN_BLEND_JIT_FUNC func = NULL;
+            auto search = ctx->blendJIT->find(compileState);
+            if (search != ctx->blendJIT->end()) {
+               func = search->second;
+            } else {
+               HANDLE hJitMgr = swr_screen(ctx->pipe.screen)->hJitMgr;
+               func = JitCompileBlend(hJitMgr, compileState);
+               debug_printf("BLEND shader %p\n", func);
+               assert(func && "Error: BlendShader = NULL");
+
+               ctx->blendJIT->insert(std::make_pair(compileState, func));
+            }
+            SwrSetBlendFunc(ctx->swrContext, target, func);
+         }
+
+      SwrSetBlendState(ctx->swrContext, &blendState);
+   }
+
+   if (ctx->dirty & SWR_NEW_STIPPLE) {
+      /* XXX What to do with this one??? SWR doesn't stipple */
+   }
+
+   if (ctx->dirty & (SWR_NEW_VS | SWR_NEW_SO | SWR_NEW_RASTERIZER)) {
+      ctx->vs->soState.rasterizerDisable =
+         ctx->rasterizer->rasterizer_discard;
+      SwrSetSoState(ctx->swrContext, &ctx->vs->soState);
+
+      pipe_stream_output_info *stream_output = &ctx->vs->pipe.stream_output;
+
+      for (uint32_t i = 0; i < ctx->num_so_targets; i++) {
+         SWR_STREAMOUT_BUFFER buffer = {0};
+         if (!ctx->so_targets[i])
+            continue;
+         buffer.enable = true;
+         buffer.pBuffer =
+            (uint32_t *)swr_resource_data(ctx->so_targets[i]->buffer);
+         buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2;
+         buffer.pitch = stream_output->stride[i];
+         buffer.streamOffset = ctx->so_targets[i]->buffer_offset >> 2;
+
+         SwrSetSoBuffers(ctx->swrContext, &buffer, i);
+      }
+   }
+
+   uint32_t linkage = ctx->vs->linkageMask;
+   if (ctx->rasterizer->sprite_coord_enable)
+      linkage |= (1 << ctx->vs->info.base.num_outputs);
+
+   SwrSetLinkage(ctx->swrContext, linkage, NULL);
+
+   // set up frontend state
+   SWR_FRONTEND_STATE feState = {0};
+   SwrSetFrontendState(ctx->swrContext, &feState);
+
+   // set up backend state
+   SWR_BACKEND_STATE backendState = {0};
+   backendState.numAttributes = 1;
+   backendState.numComponents[0] = 4;
+   backendState.constantInterpolationMask = ctx->fs->constantMask;
+   backendState.pointSpriteTexCoordMask = ctx->fs->pointSpriteMask;
+
+   SwrSetBackendState(ctx->swrContext, &backendState);
+
+   ctx->dirty = post_update_dirty_flags;
+}
+
+static struct pipe_stream_output_target *
+swr_create_so_target(struct pipe_context *pipe,
+                     struct pipe_resource *buffer,
+                     unsigned buffer_offset,
+                     unsigned buffer_size)
+{
+   struct pipe_stream_output_target *target;
+
+   target = CALLOC_STRUCT(pipe_stream_output_target);
+   if (!target)
+      return NULL;
+
+   target->context = pipe;
+   target->reference.count = 1;
+   pipe_resource_reference(&target->buffer, buffer);
+   target->buffer_offset = buffer_offset;
+   target->buffer_size = buffer_size;
+   return target;
+}
+
+static void
+swr_destroy_so_target(struct pipe_context *pipe,
+                      struct pipe_stream_output_target *target)
+{
+   pipe_resource_reference(&target->buffer, NULL);
+   FREE(target);
+}
+
+static void
+swr_set_so_targets(struct pipe_context *pipe,
+                   unsigned num_targets,
+                   struct pipe_stream_output_target **targets,
+                   const unsigned *offsets)
+{
+   struct swr_context *swr = swr_context(pipe);
+   uint32_t i;
+
+   assert(num_targets < MAX_SO_STREAMS);
+
+   for (i = 0; i < num_targets; i++) {
+      pipe_so_target_reference(
+         (struct pipe_stream_output_target **)&swr->so_targets[i],
+         targets[i]);
+   }
+
+   for (/* fall-through */; i < swr->num_so_targets; i++) {
+      pipe_so_target_reference(
+         (struct pipe_stream_output_target **)&swr->so_targets[i], NULL);
+   }
+
+   swr->num_so_targets = num_targets;
+
+   swr->dirty = SWR_NEW_SO;
+}
+
+
+void
+swr_state_init(struct pipe_context *pipe)
+{
+   pipe->create_blend_state = swr_create_blend_state;
+   pipe->bind_blend_state = swr_bind_blend_state;
+   pipe->delete_blend_state = swr_delete_blend_state;
+
+   pipe->create_depth_stencil_alpha_state = swr_create_depth_stencil_state;
+   pipe->bind_depth_stencil_alpha_state = swr_bind_depth_stencil_state;
+   pipe->delete_depth_stencil_alpha_state = swr_delete_depth_stencil_state;
+
+   pipe->create_rasterizer_state = swr_create_rasterizer_state;
+   pipe->bind_rasterizer_state = swr_bind_rasterizer_state;
+   pipe->delete_rasterizer_state = swr_delete_rasterizer_state;
+
+   pipe->create_sampler_state = swr_create_sampler_state;
+   pipe->bind_sampler_states = swr_bind_sampler_states;
+   pipe->delete_sampler_state = swr_delete_sampler_state;
+
+   pipe->create_sampler_view = swr_create_sampler_view;
+   pipe->set_sampler_views = swr_set_sampler_views;
+   pipe->sampler_view_destroy = swr_sampler_view_destroy;
+
+   pipe->create_vs_state = swr_create_vs_state;
+   pipe->bind_vs_state = swr_bind_vs_state;
+   pipe->delete_vs_state = swr_delete_vs_state;
+
+   pipe->create_fs_state = swr_create_fs_state;
+   pipe->bind_fs_state = swr_bind_fs_state;
+   pipe->delete_fs_state = swr_delete_fs_state;
+
+   pipe->set_constant_buffer = swr_set_constant_buffer;
+
+   pipe->create_vertex_elements_state = swr_create_vertex_elements_state;
+   pipe->bind_vertex_elements_state = swr_bind_vertex_elements_state;
+   pipe->delete_vertex_elements_state = swr_delete_vertex_elements_state;
+
+   pipe->set_vertex_buffers = swr_set_vertex_buffers;
+   pipe->set_index_buffer = swr_set_index_buffer;
+
+   pipe->set_polygon_stipple = swr_set_polygon_stipple;
+   pipe->set_clip_state = swr_set_clip_state;
+   pipe->set_scissor_states = swr_set_scissor_states;
+   pipe->set_viewport_states = swr_set_viewport_states;
+
+   pipe->set_framebuffer_state = swr_set_framebuffer_state;
+
+   pipe->set_blend_color = swr_set_blend_color;
+   pipe->set_stencil_ref = swr_set_stencil_ref;
+
+   pipe->set_sample_mask = swr_set_sample_mask;
+
+   pipe->create_stream_output_target = swr_create_so_target;
+   pipe->stream_output_target_destroy = swr_destroy_so_target;
+   pipe->set_stream_output_targets = swr_set_so_targets;
+}
diff --git a/src/gallium/drivers/swr/swr_state.h b/src/gallium/drivers/swr/swr_state.h
new file mode 100644 (file)
index 0000000..a2b4d80
--- /dev/null
@@ -0,0 +1,307 @@
+/****************************************************************************
+ * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ***************************************************************************/
+
+#ifndef SWR_STATE_H
+#define SWR_STATE_H
+
+#include "pipe/p_defines.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+#include "gallivm/lp_bld_tgsi.h"
+#include "util/u_hash.h"
+#include "api.h"
+#include "swr_tex_sample.h"
+#include "swr_shader.h"
+#include <unordered_map>
+
+/* skeleton */
+struct swr_vertex_shader {
+   struct pipe_shader_state pipe;
+   struct lp_tgsi_info info;
+   unsigned linkageMask;
+   PFN_VERTEX_FUNC func;
+   SWR_STREAMOUT_STATE soState;
+   PFN_SO_FUNC soFunc[PIPE_PRIM_MAX];
+};
+
+struct swr_fragment_shader {
+   struct pipe_shader_state pipe;
+   struct lp_tgsi_info info;
+   uint32_t constantMask;
+   uint32_t pointSpriteMask;
+   std::unordered_map<swr_jit_key, PFN_PIXEL_KERNEL> map;
+};
+
+/* Vertex element state */
+struct swr_vertex_element_state {
+   FETCH_COMPILE_STATE fsState;
+   PFN_FETCH_FUNC fsFunc;
+   uint32_t stream_pitch[PIPE_MAX_ATTRIBS];
+};
+
+struct swr_blend_state {
+   struct pipe_blend_state pipe;
+   SWR_BLEND_STATE blendState;
+   RENDER_TARGET_BLEND_COMPILE_STATE compileState[PIPE_MAX_COLOR_BUFS];
+};
+
+/*
+ * Derived SWR API DrawState
+ * For convenience of making simple changes without re-deriving state.
+ */
+struct swr_derived_state {
+   SWR_RASTSTATE rastState;
+   SWR_VIEWPORT vp;
+   SWR_VIEWPORT_MATRIX vpm;
+};
+
+void swr_update_derived(struct swr_context *,
+                        const struct pipe_draw_info * = nullptr);
+
+/*
+ * Conversion functions: Convert mesa state defines to SWR.
+ */
+
+static INLINE SWR_LOGIC_OP
+swr_convert_logic_op(const UINT op)
+{
+   switch (op) {
+   case PIPE_LOGICOP_CLEAR:
+      return LOGICOP_CLEAR;
+   case PIPE_LOGICOP_NOR:
+      return LOGICOP_NOR;
+   case PIPE_LOGICOP_AND_INVERTED:
+      return LOGICOP_CLEAR;
+   case PIPE_LOGICOP_COPY_INVERTED:
+      return LOGICOP_COPY_INVERTED;
+   case PIPE_LOGICOP_AND_REVERSE:
+      return LOGICOP_AND_REVERSE;
+   case PIPE_LOGICOP_INVERT:
+      return LOGICOP_INVERT;
+   case PIPE_LOGICOP_XOR:
+      return LOGICOP_XOR;
+   case PIPE_LOGICOP_NAND:
+      return LOGICOP_NAND;
+   case PIPE_LOGICOP_AND:
+      return LOGICOP_AND;
+   case PIPE_LOGICOP_EQUIV:
+      return LOGICOP_EQUIV;
+   case PIPE_LOGICOP_NOOP:
+      return LOGICOP_NOOP;
+   case PIPE_LOGICOP_OR_INVERTED:
+      return LOGICOP_OR_INVERTED;
+   case PIPE_LOGICOP_COPY:
+      return LOGICOP_COPY;
+   case PIPE_LOGICOP_OR_REVERSE:
+      return LOGICOP_OR_REVERSE;
+   case PIPE_LOGICOP_OR:
+      return LOGICOP_OR;
+   case PIPE_LOGICOP_SET:
+      return LOGICOP_SET;
+   default:
+      assert(0 && "Unsupported logic op");
+      return LOGICOP_NOOP;
+   }
+}
+
+static INLINE SWR_STENCILOP
+swr_convert_stencil_op(const UINT op)
+{
+   switch (op) {
+   case PIPE_STENCIL_OP_KEEP:
+      return STENCILOP_KEEP;
+   case PIPE_STENCIL_OP_ZERO:
+      return STENCILOP_ZERO;
+   case PIPE_STENCIL_OP_REPLACE:
+      return STENCILOP_REPLACE;
+   case PIPE_STENCIL_OP_INCR:
+      return STENCILOP_INCRSAT;
+   case PIPE_STENCIL_OP_DECR:
+      return STENCILOP_DECRSAT;
+   case PIPE_STENCIL_OP_INCR_WRAP:
+      return STENCILOP_INCR;
+   case PIPE_STENCIL_OP_DECR_WRAP:
+      return STENCILOP_DECR;
+   case PIPE_STENCIL_OP_INVERT:
+      return STENCILOP_INVERT;
+   default:
+      assert(0 && "Unsupported stencil op");
+      return STENCILOP_KEEP;
+   }
+}
+
+static INLINE SWR_FORMAT
+swr_convert_index_type(const UINT index_size)
+{
+   switch (index_size) {
+   case sizeof(unsigned char):
+      return R8_UINT;
+   case sizeof(unsigned short):
+      return R16_UINT;
+   case sizeof(unsigned int):
+      return R32_UINT;
+   default:
+      assert(0 && "Unsupported index type");
+      return R32_UINT;
+   }
+}
+
+
+static INLINE SWR_ZFUNCTION
+swr_convert_depth_func(const UINT pipe_func)
+{
+   switch (pipe_func) {
+   case PIPE_FUNC_NEVER:
+      return ZFUNC_NEVER;
+   case PIPE_FUNC_LESS:
+      return ZFUNC_LT;
+   case PIPE_FUNC_EQUAL:
+      return ZFUNC_EQ;
+   case PIPE_FUNC_LEQUAL:
+      return ZFUNC_LE;
+   case PIPE_FUNC_GREATER:
+      return ZFUNC_GT;
+   case PIPE_FUNC_NOTEQUAL:
+      return ZFUNC_NE;
+   case PIPE_FUNC_GEQUAL:
+      return ZFUNC_GE;
+   case PIPE_FUNC_ALWAYS:
+      return ZFUNC_ALWAYS;
+   default:
+      assert(0 && "Unsupported depth func");
+      return ZFUNC_ALWAYS;
+   }
+}
+
+
+static INLINE SWR_CULLMODE
+swr_convert_cull_mode(const UINT cull_face)
+{
+   switch (cull_face) {
+   case PIPE_FACE_NONE:
+      return SWR_CULLMODE_NONE;
+   case PIPE_FACE_FRONT:
+      return SWR_CULLMODE_FRONT;
+   case PIPE_FACE_BACK:
+      return SWR_CULLMODE_BACK;
+   case PIPE_FACE_FRONT_AND_BACK:
+      return SWR_CULLMODE_BOTH;
+   default:
+      assert(0 && "Invalid cull mode");
+      return SWR_CULLMODE_NONE;
+   }
+}
+
+static INLINE SWR_BLEND_OP
+swr_convert_blend_func(const UINT blend_func)
+{
+   switch (blend_func) {
+   case PIPE_BLEND_ADD:
+      return BLENDOP_ADD;
+   case PIPE_BLEND_SUBTRACT:
+      return BLENDOP_SUBTRACT;
+   case PIPE_BLEND_REVERSE_SUBTRACT:
+      return BLENDOP_REVSUBTRACT;
+   case PIPE_BLEND_MIN:
+      return BLENDOP_MIN;
+   case PIPE_BLEND_MAX:
+      return BLENDOP_MAX;
+   default:
+      assert(0 && "Invalid blend func");
+      return BLENDOP_ADD;
+   }
+}
+
+static INLINE SWR_BLEND_FACTOR
+swr_convert_blend_factor(const UINT blend_factor)
+{
+   switch (blend_factor) {
+   case PIPE_BLENDFACTOR_ONE:
+      return BLENDFACTOR_ONE;
+   case PIPE_BLENDFACTOR_SRC_COLOR:
+      return BLENDFACTOR_SRC_COLOR;
+   case PIPE_BLENDFACTOR_SRC_ALPHA:
+      return BLENDFACTOR_SRC_ALPHA;
+   case PIPE_BLENDFACTOR_DST_ALPHA:
+      return BLENDFACTOR_DST_ALPHA;
+   case PIPE_BLENDFACTOR_DST_COLOR:
+      return BLENDFACTOR_DST_COLOR;
+   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+      return BLENDFACTOR_SRC_ALPHA_SATURATE;
+   case PIPE_BLENDFACTOR_CONST_COLOR:
+      return BLENDFACTOR_CONST_COLOR;
+   case PIPE_BLENDFACTOR_CONST_ALPHA:
+      return BLENDFACTOR_CONST_ALPHA;
+   case PIPE_BLENDFACTOR_SRC1_COLOR:
+      return BLENDFACTOR_SRC1_COLOR;
+   case PIPE_BLENDFACTOR_SRC1_ALPHA:
+      return BLENDFACTOR_SRC1_ALPHA;
+   case PIPE_BLENDFACTOR_ZERO:
+      return BLENDFACTOR_ZERO;
+   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+      return BLENDFACTOR_INV_SRC_COLOR;
+   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+      return BLENDFACTOR_INV_SRC_ALPHA;
+   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+      return BLENDFACTOR_INV_DST_ALPHA;
+   case PIPE_BLENDFACTOR_INV_DST_COLOR:
+      return BLENDFACTOR_INV_DST_COLOR;
+   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+      return BLENDFACTOR_INV_CONST_COLOR;
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+      return BLENDFACTOR_INV_CONST_ALPHA;
+   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+      return BLENDFACTOR_INV_SRC1_COLOR;
+   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+      return BLENDFACTOR_INV_SRC1_ALPHA;
+   default:
+      assert(0 && "Invalid blend factor");
+      return BLENDFACTOR_ONE;
+   }
+}
+
+static INLINE enum SWR_SURFACE_TYPE
+swr_convert_target_type(const enum pipe_texture_target target)
+{
+   switch (target) {
+   case PIPE_BUFFER:
+      return SURFACE_BUFFER;
+   case PIPE_TEXTURE_1D:
+   case PIPE_TEXTURE_1D_ARRAY:
+      return SURFACE_1D;
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_2D_ARRAY:
+   case PIPE_TEXTURE_RECT:
+      return SURFACE_2D;
+   case PIPE_TEXTURE_3D:
+      return SURFACE_3D;
+   case PIPE_TEXTURE_CUBE:
+   case PIPE_TEXTURE_CUBE_ARRAY:
+      return SURFACE_CUBE;
+   default:
+      assert(0);
+      return SURFACE_NULL;
+   }
+}
+#endif
diff --git a/src/gallium/drivers/swr/swr_tex_sample.cpp b/src/gallium/drivers/swr/swr_tex_sample.cpp
new file mode 100644 (file)
index 0000000..8e01e32
--- /dev/null
@@ -0,0 +1,338 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Largely a copy of llvmpipe's lp_tex_sample.c
+ */
+
+/**
+ * Texture sampling code generation
+ *
+ * This file is nothing more than ugly glue between three largely independent
+ * entities:
+ * - TGSI -> LLVM translation (i.e., lp_build_tgsi_soa)
+ * - texture sampling code generation (i.e., lp_build_sample_soa)
+ * - SWR driver
+ *
+ * All interesting code is in the functions mentioned above. There is really
+ * nothing to see here.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "state.h"
+#include "JitManager.h"
+#include "state_llvm.h"
+
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_sample.h"
+#include "gallivm/lp_bld_tgsi.h"
+#include "util/u_memory.h"
+
+#include "swr_tex_sample.h"
+#include "swr_context_llvm.h"
+
+
+/**
+ * This provides the bridge between the sampler state store in
+ * lp_jit_context and lp_jit_texture and the sampler code
+ * generator. It provides the texture layout information required by
+ * the texture sampler code generator in terms of the state stored in
+ * lp_jit_context and lp_jit_texture in runtime.
+ */
+struct swr_sampler_dynamic_state {
+   struct lp_sampler_dynamic_state base;
+
+   const struct swr_sampler_static_state *static_state;
+};
+
+
+/**
+ * This is the bridge between our sampler and the TGSI translator.
+ */
+struct swr_sampler_soa {
+   struct lp_build_sampler_soa base;
+
+   struct swr_sampler_dynamic_state dynamic_state;
+};
+
+
+/**
+ * Fetch the specified member of the lp_jit_texture structure.
+ * \param emit_load  if TRUE, emit the LLVM load instruction to actually
+ *                   fetch the field's value.  Otherwise, just emit the
+ *                   GEP code to address the field.
+ *
+ * @sa http://llvm.org/docs/GetElementPtr.html
+ */
+static LLVMValueRef
+swr_texture_member(const struct lp_sampler_dynamic_state *base,
+                   struct gallivm_state *gallivm,
+                   LLVMValueRef context_ptr,
+                   unsigned texture_unit,
+                   unsigned member_index,
+                   const char *member_name,
+                   boolean emit_load)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef indices[4];
+   LLVMValueRef ptr;
+   LLVMValueRef res;
+
+   assert(texture_unit < PIPE_MAX_SHADER_SAMPLER_VIEWS);
+
+   /* context[0] */
+   indices[0] = lp_build_const_int32(gallivm, 0);
+   /* context[0].textures */
+   indices[1] = lp_build_const_int32(gallivm, swr_draw_context_texturesFS);
+   /* context[0].textures[unit] */
+   indices[2] = lp_build_const_int32(gallivm, texture_unit);
+   /* context[0].textures[unit].member */
+   indices[3] = lp_build_const_int32(gallivm, member_index);
+
+   ptr = LLVMBuildGEP(builder, context_ptr, indices, Elements(indices), "");
+
+   if (emit_load)
+      res = LLVMBuildLoad(builder, ptr, "");
+   else
+      res = ptr;
+
+   lp_build_name(res, "context.texture%u.%s", texture_unit, member_name);
+
+   return res;
+}
+
+
+/**
+ * Helper macro to instantiate the functions that generate the code to
+ * fetch the members of lp_jit_texture to fulfill the sampler code
+ * generator requests.
+ *
+ * This complexity is the price we have to pay to keep the texture
+ * sampler code generator a reusable module without dependencies to
+ * swr internals.
+ */
+#define SWR_TEXTURE_MEMBER(_name, _emit_load)                                \
+   static LLVMValueRef swr_texture_##_name(                                  \
+      const struct lp_sampler_dynamic_state *base,                           \
+      struct gallivm_state *gallivm,                                         \
+      LLVMValueRef context_ptr,                                              \
+      unsigned texture_unit)                                                 \
+   {                                                                         \
+      return swr_texture_member(base,                                        \
+                                gallivm,                                     \
+                                context_ptr,                                 \
+                                texture_unit,                                \
+                                swr_jit_texture_##_name,                     \
+                                #_name,                                      \
+                                _emit_load);                                 \
+   }
+
+
+SWR_TEXTURE_MEMBER(width, TRUE)
+SWR_TEXTURE_MEMBER(height, TRUE)
+SWR_TEXTURE_MEMBER(depth, TRUE)
+SWR_TEXTURE_MEMBER(first_level, TRUE)
+SWR_TEXTURE_MEMBER(last_level, TRUE)
+SWR_TEXTURE_MEMBER(base_ptr, TRUE)
+SWR_TEXTURE_MEMBER(row_stride, FALSE)
+SWR_TEXTURE_MEMBER(img_stride, FALSE)
+SWR_TEXTURE_MEMBER(mip_offsets, FALSE)
+
+
+/**
+ * Fetch the specified member of the lp_jit_sampler structure.
+ * \param emit_load  if TRUE, emit the LLVM load instruction to actually
+ *                   fetch the field's value.  Otherwise, just emit the
+ *                   GEP code to address the field.
+ *
+ * @sa http://llvm.org/docs/GetElementPtr.html
+ */
+static LLVMValueRef
+swr_sampler_member(const struct lp_sampler_dynamic_state *base,
+                   struct gallivm_state *gallivm,
+                   LLVMValueRef context_ptr,
+                   unsigned sampler_unit,
+                   unsigned member_index,
+                   const char *member_name,
+                   boolean emit_load)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef indices[4];
+   LLVMValueRef ptr;
+   LLVMValueRef res;
+
+   assert(sampler_unit < PIPE_MAX_SAMPLERS);
+
+   /* context[0] */
+   indices[0] = lp_build_const_int32(gallivm, 0);
+   /* context[0].samplers */
+   indices[1] = lp_build_const_int32(gallivm, swr_draw_context_samplersFS);
+   /* context[0].samplers[unit] */
+   indices[2] = lp_build_const_int32(gallivm, sampler_unit);
+   /* context[0].samplers[unit].member */
+   indices[3] = lp_build_const_int32(gallivm, member_index);
+
+   ptr = LLVMBuildGEP(builder, context_ptr, indices, Elements(indices), "");
+
+   if (emit_load)
+      res = LLVMBuildLoad(builder, ptr, "");
+   else
+      res = ptr;
+
+   lp_build_name(res, "context.sampler%u.%s", sampler_unit, member_name);
+
+   return res;
+}
+
+
+#define SWR_SAMPLER_MEMBER(_name, _emit_load)                                \
+   static LLVMValueRef swr_sampler_##_name(                                  \
+      const struct lp_sampler_dynamic_state *base,                           \
+      struct gallivm_state *gallivm,                                         \
+      LLVMValueRef context_ptr,                                              \
+      unsigned sampler_unit)                                                 \
+   {                                                                         \
+      return swr_sampler_member(base,                                        \
+                                gallivm,                                     \
+                                context_ptr,                                 \
+                                sampler_unit,                                \
+                                swr_jit_sampler_##_name,                     \
+                                #_name,                                      \
+                                _emit_load);                                 \
+   }
+
+
+SWR_SAMPLER_MEMBER(min_lod, TRUE)
+SWR_SAMPLER_MEMBER(max_lod, TRUE)
+SWR_SAMPLER_MEMBER(lod_bias, TRUE)
+SWR_SAMPLER_MEMBER(border_color, FALSE)
+
+
+static void
+swr_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
+{
+   FREE(sampler);
+}
+
+
+/**
+ * Fetch filtered values from texture.
+ * The 'texel' parameter returns four vectors corresponding to R, G, B, A.
+ */
+static void
+swr_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
+                                 struct gallivm_state *gallivm,
+                                 const struct lp_sampler_params *params)
+{
+   struct swr_sampler_soa *sampler = (struct swr_sampler_soa *)base;
+   unsigned texture_index = params->texture_index;
+   unsigned sampler_index = params->sampler_index;
+
+   assert(sampler_index < PIPE_MAX_SAMPLERS);
+   assert(texture_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
+
+#if 0
+      lp_build_sample_nop(gallivm, params->type, params->coords, params->texel);
+#else
+   lp_build_sample_soa(
+      &sampler->dynamic_state.static_state[texture_index].texture_state,
+      &sampler->dynamic_state.static_state[sampler_index].sampler_state,
+      &sampler->dynamic_state.base,
+      gallivm,
+      params);
+#endif
+}
+
+/**
+ * Fetch the texture size.
+ */
+static void
+swr_sampler_soa_emit_size_query(const struct lp_build_sampler_soa *base,
+                                struct gallivm_state *gallivm,
+                                struct lp_type type,
+                                unsigned texture_unit,
+                                unsigned target,
+                                LLVMValueRef context_ptr,
+                                boolean is_sviewinfo,
+                                enum lp_sampler_lod_property lod_property,
+                                LLVMValueRef explicit_lod, /* optional */
+                                LLVMValueRef *sizes_out)
+{
+   struct swr_sampler_soa *sampler = (struct swr_sampler_soa *)base;
+
+   assert(texture_unit < PIPE_MAX_SHADER_SAMPLER_VIEWS);
+
+   lp_build_size_query_soa(
+      gallivm,
+      &sampler->dynamic_state.static_state[texture_unit].texture_state,
+      &sampler->dynamic_state.base,
+      type,
+      texture_unit,
+      target,
+      context_ptr,
+      is_sviewinfo,
+      lod_property,
+      explicit_lod,
+      sizes_out);
+}
+
+
+struct lp_build_sampler_soa *
+swr_sampler_soa_create(const struct swr_sampler_static_state *static_state)
+{
+   struct swr_sampler_soa *sampler;
+
+   sampler = CALLOC_STRUCT(swr_sampler_soa);
+   if (!sampler)
+      return NULL;
+
+   sampler->base.destroy = swr_sampler_soa_destroy;
+   sampler->base.emit_tex_sample = swr_sampler_soa_emit_fetch_texel;
+   sampler->base.emit_size_query = swr_sampler_soa_emit_size_query;
+   sampler->dynamic_state.base.width = swr_texture_width;
+   sampler->dynamic_state.base.height = swr_texture_height;
+   sampler->dynamic_state.base.depth = swr_texture_depth;
+   sampler->dynamic_state.base.first_level = swr_texture_first_level;
+   sampler->dynamic_state.base.last_level = swr_texture_last_level;
+   sampler->dynamic_state.base.base_ptr = swr_texture_base_ptr;
+   sampler->dynamic_state.base.row_stride = swr_texture_row_stride;
+   sampler->dynamic_state.base.img_stride = swr_texture_img_stride;
+   sampler->dynamic_state.base.mip_offsets = swr_texture_mip_offsets;
+   sampler->dynamic_state.base.min_lod = swr_sampler_min_lod;
+   sampler->dynamic_state.base.max_lod = swr_sampler_max_lod;
+   sampler->dynamic_state.base.lod_bias = swr_sampler_lod_bias;
+   sampler->dynamic_state.base.border_color = swr_sampler_border_color;
+
+   sampler->dynamic_state.static_state = static_state;
+
+   return &sampler->base;
+}
diff --git a/src/gallium/drivers/swr/swr_tex_sample.h b/src/gallium/drivers/swr/swr_tex_sample.h
new file mode 100644 (file)
index 0000000..f5c368c
--- /dev/null
@@ -0,0 +1,47 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#pragma once
+
+#include "gallivm/lp_bld.h"
+
+struct swr_sampler_static_state {
+   /*
+    * These attributes are effectively interleaved for more sane key handling.
+    * However, there might be lots of null space if the amount of samplers and
+    * textures isn't the same.
+    */
+   struct lp_static_sampler_state sampler_state;
+   struct lp_static_texture_state texture_state;
+};
+
+/**
+ * Pure-LLVM texture sampling code generator.
+ *
+ */
+struct lp_build_sampler_soa *
+swr_sampler_soa_create(const struct swr_sampler_static_state *key);