gallium: change comments to remove 'state tracker'
[mesa.git] / src / gallium / auxiliary / draw / draw_pipe_aaline.c
index 8955762ce27e450eba372617f08b7786b2e172f2..d380b4295d4721e51152f7c3335ed06f392d4df0 100644 (file)
@@ -1,6 +1,6 @@
 /**************************************************************************
- * 
- * Copyright 2007 VMware, Inc.
+ *
+ * Copyright 2007-2018 VMware, Inc.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 /**
- * AA line stage:  AA lines are converted to texture mapped triangles.
+ * AA line stage:  AA lines are converted triangles (with extra generic)
  *
  * Authors:  Brian Paul
  */
 #include "pipe/p_shader_tokens.h"
 #include "util/u_inlines.h"
 
-#include "util/u_format.h"
+#include "util/format/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
-#include "util/u_sampler.h"
 
 #include "tgsi/tgsi_transform.h"
 #include "tgsi/tgsi_dump.h"
 #include "draw_private.h"
 #include "draw_pipe.h"
 
+#include "nir.h"
+#include "nir/nir_draw_helpers.h"
 
 /** Approx number of new tokens for instructions in aa_transform_inst() */
-#define NUM_NEW_TOKENS 50
-
-
-/**
- * Size for the alpha texture used for antialiasing
- */
-#define TEXTURE_SIZE_LOG2  5   /* 32 x 32 */
-
-/**
- * Max texture level for the alpha texture used for antialiasing
- *
- * Don't use the 1x1 and 2x2 mipmap levels.
- */
-#define MAX_TEXTURE_LEVEL  (TEXTURE_SIZE_LOG2 - 2)
+#define NUM_NEW_TOKENS 53
 
 
 /**
@@ -75,8 +63,7 @@ struct aaline_fragment_shader
    struct pipe_shader_state state;
    void *driver_fs;
    void *aaline_fs;
-   uint sampler_unit;
-   int generic_attrib;  /**< texcoord/generic used for texture */
+   int generic_attrib;  /**< generic used for distance */
 };
 
 
@@ -89,26 +76,16 @@ struct aaline_stage
 
    float half_line_width;
 
-   /** For AA lines, this is the vertex attrib slot for the new texcoords */
-   uint tex_slot;
+   /** For AA lines, this is the vertex attrib slot for new generic */
+   uint coord_slot;
    /** position, not necessarily output zero */
    uint pos_slot;
 
-   void *sampler_cso;
-   struct pipe_resource *texture;
-   struct pipe_sampler_view *sampler_view;
-   uint num_samplers;
-   uint num_sampler_views;
-
 
    /*
     * Currently bound state
     */
    struct aaline_fragment_shader *fs;
-   struct {
-      void *sampler[PIPE_MAX_SAMPLERS];
-      struct pipe_sampler_view *sampler_views[PIPE_MAX_SHADER_SAMPLER_VIEWS];
-   } state;
 
    /*
     * Driver interface/override functions
@@ -117,13 +94,6 @@ struct aaline_stage
                                     const struct pipe_shader_state *);
    void (*driver_bind_fs_state)(struct pipe_context *, void *);
    void (*driver_delete_fs_state)(struct pipe_context *, void *);
-
-   void (*driver_bind_sampler_states)(struct pipe_context *, unsigned, unsigned,
-                                      unsigned, void **);
-
-   void (*driver_set_sampler_views)(struct pipe_context *, unsigned shader,
-                                    unsigned start, unsigned count,
-                                    struct pipe_sampler_view **);
 };
 
 
@@ -134,38 +104,27 @@ struct aaline_stage
  */
 struct aa_transform_context {
    struct tgsi_transform_context base;
-   uint tempsUsed;  /**< bitmask */
+   uint64_t tempsUsed;  /**< bitmask */
    int colorOutput; /**< which output is the primary color */
-   uint samplersUsed;  /**< bitfield of samplers used */
-   int freeSampler;  /** an available sampler for the pstipple */
    int maxInput, maxGeneric;  /**< max input index found */
-   int colorTemp, texTemp;  /**< temp registers */
-   boolean firstInstruction;
+   int colorTemp, aaTemp;  /**< temp registers */
 };
 
-
 /**
  * TGSI declaration transform callback.
- * Look for a free sampler, a free input attrib, and two free temp regs.
+ * Look for a free input attrib, and two free temp regs.
  */
 static void
 aa_transform_decl(struct tgsi_transform_context *ctx,
                   struct tgsi_full_declaration *decl)
 {
-   struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
+   struct aa_transform_context *aactx = (struct aa_transform_context *)ctx;
 
    if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
        decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
        decl->Semantic.Index == 0) {
       aactx->colorOutput = decl->Range.First;
    }
-   else if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
-      uint i;
-      for (i = decl->Range.First;
-           i <= decl->Range.Last; i++) {
-         aactx->samplersUsed |= 1 << i;
-      }
-   }
    else if (decl->Declaration.File == TGSI_FILE_INPUT) {
       if ((int) decl->Range.Last > aactx->maxInput)
          aactx->maxInput = decl->Range.Last;
@@ -178,7 +137,10 @@ aa_transform_decl(struct tgsi_transform_context *ctx,
       uint i;
       for (i = decl->Range.First;
            i <= decl->Range.Last; i++) {
-         aactx->tempsUsed |= (1 << i);
+         /*
+          * XXX this bitfield doesn't really cut it...
+          */
+         aactx->tempsUsed |= UINT64_C(1) << i;
       }
    }
 
@@ -187,164 +149,127 @@ aa_transform_decl(struct tgsi_transform_context *ctx,
 
 
 /**
- * Find the lowest zero bit in the given word, or -1 if bitfield is all ones.
+ * Find the lowest zero bit, or -1 if bitfield is all ones.
  */
 static int
-free_bit(uint bitfield)
+free_bit(uint64_t bitfield)
 {
-   return ffs(~bitfield) - 1;
+   return ffsll(~bitfield) - 1;
 }
 
 
 /**
- * TGSI instruction transform callback.
- * Replace writes to result.color w/ a temp reg.
- * Upon END instruction, insert texture sampling code for antialiasing.
+ * TGSI transform prolog callback.
  */
 static void
-aa_transform_inst(struct tgsi_transform_context *ctx,
-                  struct tgsi_full_instruction *inst)
+aa_transform_prolog(struct tgsi_transform_context *ctx)
 {
    struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
+   uint64_t usedTemps = aactx->tempsUsed;
+
+   /* find two free temp regs */
+   aactx->colorTemp = free_bit(usedTemps);
+   usedTemps |= UINT64_C(1) << aactx->colorTemp;
+   aactx->aaTemp = free_bit(usedTemps);
+   assert(aactx->colorTemp >= 0);
+   assert(aactx->aaTemp >= 0);
+
+   /* declare new generic input/texcoord */
+   tgsi_transform_input_decl(ctx, aactx->maxInput + 1,
+                             TGSI_SEMANTIC_GENERIC, aactx->maxGeneric + 1,
+                             TGSI_INTERPOLATE_LINEAR);
+
+   /* declare new temp regs */
+   tgsi_transform_temp_decl(ctx, aactx->aaTemp);
+   tgsi_transform_temp_decl(ctx, aactx->colorTemp);
+}
 
-   if (aactx->firstInstruction) {
-      /* emit our new declarations before the first instruction */
 
-      struct tgsi_full_declaration decl;
-      uint i;
-
-      /* find free sampler */
-      aactx->freeSampler = free_bit(aactx->samplersUsed);
-      if (aactx->freeSampler >= PIPE_MAX_SAMPLERS)
-         aactx->freeSampler = PIPE_MAX_SAMPLERS - 1;
-
-      /* find two free temp regs */
-      for (i = 0; i < 32; i++) {
-         if ((aactx->tempsUsed & (1 << i)) == 0) {
-            /* found a free temp */
-            if (aactx->colorTemp < 0)
-               aactx->colorTemp  = i;
-            else if (aactx->texTemp < 0)
-               aactx->texTemp  = i;
-            else
-               break;
-         }
-      }
-      assert(aactx->colorTemp >= 0);
-      assert(aactx->texTemp >= 0);
-
-      /* declare new generic input/texcoord */
-      decl = tgsi_default_full_declaration();
-      decl.Declaration.File = TGSI_FILE_INPUT;
-      /* XXX this could be linear... */
-      decl.Declaration.Interpolate = 1;
-      decl.Declaration.Semantic = 1;
-      decl.Semantic.Name = TGSI_SEMANTIC_GENERIC;
-      decl.Semantic.Index = aactx->maxGeneric + 1;
-      decl.Range.First = 
-      decl.Range.Last = aactx->maxInput + 1;
-      decl.Interp.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
-      ctx->emit_declaration(ctx, &decl);
-
-      /* declare new sampler */
-      decl = tgsi_default_full_declaration();
-      decl.Declaration.File = TGSI_FILE_SAMPLER;
-      decl.Range.First = 
-      decl.Range.Last = aactx->freeSampler;
-      ctx->emit_declaration(ctx, &decl);
-
-      /* declare new temp regs */
-      decl = tgsi_default_full_declaration();
-      decl.Declaration.File = TGSI_FILE_TEMPORARY;
-      decl.Range.First = 
-      decl.Range.Last = aactx->texTemp;
-      ctx->emit_declaration(ctx, &decl);
-
-      decl = tgsi_default_full_declaration();
-      decl.Declaration.File = TGSI_FILE_TEMPORARY;
-      decl.Range.First = 
-      decl.Range.Last = aactx->colorTemp;
-      ctx->emit_declaration(ctx, &decl);
-
-      aactx->firstInstruction = FALSE;
-   }
+/**
+ * TGSI transform epilog callback.
+ */
+static void
+aa_transform_epilog(struct tgsi_transform_context *ctx)
+{
+   struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
 
-   if (inst->Instruction.Opcode == TGSI_OPCODE_END &&
-       aactx->colorOutput != -1) {
-      struct tgsi_full_instruction newInst;
-
-      /* TEX */
-      newInst = tgsi_default_full_instruction();
-      newInst.Instruction.Opcode = TGSI_OPCODE_TEX;
-      newInst.Instruction.NumDstRegs = 1;
-      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
-      newInst.Dst[0].Register.Index = aactx->texTemp;
-      newInst.Instruction.NumSrcRegs = 2;
-      newInst.Instruction.Texture = TRUE;
-      newInst.Texture.Texture = TGSI_TEXTURE_2D;
-      newInst.Src[0].Register.File = TGSI_FILE_INPUT;
-      newInst.Src[0].Register.Index = aactx->maxInput + 1;
-      newInst.Src[1].Register.File = TGSI_FILE_SAMPLER;
-      newInst.Src[1].Register.Index = aactx->freeSampler;
-
-      ctx->emit_instruction(ctx, &newInst);
+   if (aactx->colorOutput != -1) {
+      struct tgsi_full_instruction inst;
+      /* insert distance-based coverage code for antialiasing. */
+
+      /* saturate(linewidth - fabs(interpx), linelength - fabs(interpz) */
+      inst = tgsi_default_full_instruction();
+      inst.Instruction.Saturate = true;
+      inst.Instruction.Opcode = TGSI_OPCODE_ADD;
+      inst.Instruction.NumDstRegs = 1;
+      tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
+                             aactx->aaTemp, TGSI_WRITEMASK_XZ);
+      inst.Instruction.NumSrcRegs = 2;
+      tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_INPUT, aactx->maxInput + 1,
+                             TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
+                             TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z);
+      tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_INPUT, aactx->maxInput + 1,
+                             TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y,
+                             TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
+      inst.Src[1].Register.Absolute = true;
+      inst.Src[1].Register.Negate = true;
+      ctx->emit_instruction(ctx, &inst);
+
+      /* MUL width / height alpha */
+      tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL,
+                                  TGSI_FILE_TEMPORARY, aactx->aaTemp,
+                                  TGSI_WRITEMASK_W,
+                                  TGSI_FILE_TEMPORARY, aactx->aaTemp,
+                                  TGSI_SWIZZLE_X,
+                                  TGSI_FILE_TEMPORARY, aactx->aaTemp,
+                                  TGSI_SWIZZLE_Z, false);
 
       /* MOV rgb */
-      newInst = tgsi_default_full_instruction();
-      newInst.Instruction.Opcode = TGSI_OPCODE_MOV;
-      newInst.Instruction.NumDstRegs = 1;
-      newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
-      newInst.Dst[0].Register.Index = aactx->colorOutput;
-      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ;
-      newInst.Instruction.NumSrcRegs = 1;
-      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[0].Register.Index = aactx->colorTemp;
-      ctx->emit_instruction(ctx, &newInst);
+      tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
+                              TGSI_FILE_OUTPUT, aactx->colorOutput,
+                              TGSI_WRITEMASK_XYZ,
+                              TGSI_FILE_TEMPORARY, aactx->colorTemp);
 
       /* MUL alpha */
-      newInst = tgsi_default_full_instruction();
-      newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
-      newInst.Instruction.NumDstRegs = 1;
-      newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
-      newInst.Dst[0].Register.Index = aactx->colorOutput;
-      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
-      newInst.Instruction.NumSrcRegs = 2;
-      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[0].Register.Index = aactx->colorTemp;
-      newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[1].Register.Index = aactx->texTemp;
-      ctx->emit_instruction(ctx, &newInst);
-
-      /* END */
-      newInst = tgsi_default_full_instruction();
-      newInst.Instruction.Opcode = TGSI_OPCODE_END;
-      newInst.Instruction.NumDstRegs = 0;
-      newInst.Instruction.NumSrcRegs = 0;
-      ctx->emit_instruction(ctx, &newInst);
+      tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
+                              TGSI_FILE_OUTPUT, aactx->colorOutput,
+                              TGSI_WRITEMASK_W,
+                              TGSI_FILE_TEMPORARY, aactx->colorTemp,
+                              TGSI_FILE_TEMPORARY, aactx->aaTemp, false);
    }
-   else {
-      /* Not an END instruction.
-       * Look for writes to result.color and replace with colorTemp reg.
-       */
-      uint i;
+}
 
-      for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
-         struct tgsi_full_dst_register *dst = &inst->Dst[i];
-         if (dst->Register.File == TGSI_FILE_OUTPUT &&
-             dst->Register.Index == aactx->colorOutput) {
-            dst->Register.File = TGSI_FILE_TEMPORARY;
-            dst->Register.Index = aactx->colorTemp;
-         }
-      }
 
-      ctx->emit_instruction(ctx, inst);
+/**
+ * TGSI instruction transform callback.
+ * Replace writes to result.color w/ a temp reg.
+ */
+static void
+aa_transform_inst(struct tgsi_transform_context *ctx,
+                  struct tgsi_full_instruction *inst)
+{
+   struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
+   uint i;
+
+   /*
+    * Look for writes to result.color and replace with colorTemp reg.
+    */
+   for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+      struct tgsi_full_dst_register *dst = &inst->Dst[i];
+      if (dst->Register.File == TGSI_FILE_OUTPUT &&
+          dst->Register.Index == aactx->colorOutput) {
+         dst->Register.File = TGSI_FILE_TEMPORARY;
+         dst->Register.Index = aactx->colorTemp;
+      }
    }
+
+   ctx->emit_instruction(ctx, inst);
 }
 
 
 /**
  * Generate the frag shader we'll use for drawing AA lines.
- * This will be the user's shader plus some texture/modulate instructions.
+ * This will be the user's shader plus some arithmetic instructions.
  */
 static boolean
 generate_aaline_fs(struct aaline_stage *aaline)
@@ -365,8 +290,9 @@ generate_aaline_fs(struct aaline_stage *aaline)
    transform.maxInput = -1;
    transform.maxGeneric = -1;
    transform.colorTemp = -1;
-   transform.texTemp = -1;
-   transform.firstInstruction = TRUE;
+   transform.aaTemp = -1;
+   transform.base.prolog = aa_transform_prolog;
+   transform.base.epilog = aa_transform_epilog;
    transform.base.transform_instruction = aa_transform_inst;
    transform.base.transform_declaration = aa_transform_decl;
 
@@ -381,8 +307,6 @@ generate_aaline_fs(struct aaline_stage *aaline)
    tgsi_dump(aaline_fs.tokens, 0);
 #endif
 
-   aaline->fs->sampler_unit = transform.freeSampler;
-
    aaline->fs->aaline_fs = aaline->driver_create_fs_state(pipe, &aaline_fs);
    if (aaline->fs->aaline_fs == NULL)
       goto fail;
@@ -396,126 +320,31 @@ fail:
    return FALSE;
 }
 
-
-/**
- * Create the texture map we'll use for antialiasing the lines.
- */
 static boolean
-aaline_create_texture(struct aaline_stage *aaline)
+generate_aaline_fs_nir(struct aaline_stage *aaline)
 {
+#ifdef LLVM_AVAILABLE
    struct pipe_context *pipe = aaline->stage.draw->pipe;
-   struct pipe_screen *screen = pipe->screen;
-   struct pipe_resource texTemp;
-   struct pipe_sampler_view viewTempl;
-   uint level;
-
-   memset(&texTemp, 0, sizeof(texTemp));
-   texTemp.target = PIPE_TEXTURE_2D;
-   texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */
-   texTemp.last_level = MAX_TEXTURE_LEVEL;
-   texTemp.width0 = 1 << TEXTURE_SIZE_LOG2;
-   texTemp.height0 = 1 << TEXTURE_SIZE_LOG2;
-   texTemp.depth0 = 1;
-   texTemp.array_size = 1;
-   texTemp.bind = PIPE_BIND_SAMPLER_VIEW;
-
-   aaline->texture = screen->resource_create(screen, &texTemp);
-   if (!aaline->texture)
-      return FALSE;
+   const struct pipe_shader_state *orig_fs = &aaline->fs->state;
+   struct pipe_shader_state aaline_fs;
 
-   u_sampler_view_default_template(&viewTempl,
-                                   aaline->texture,
-                                   aaline->texture->format);
-   aaline->sampler_view = pipe->create_sampler_view(pipe,
-                                                    aaline->texture,
-                                                    &viewTempl);
-   if (!aaline->sampler_view) {
+   aaline_fs = *orig_fs; /* copy to init */
+   aaline_fs.ir.nir = nir_shader_clone(NULL, orig_fs->ir.nir);
+   if (!aaline_fs.ir.nir)
       return FALSE;
-   }
 
-   /* Fill in mipmap images.
-    * Basically each level is solid opaque, except for the outermost
-    * texels which are zero.  Special case the 1x1 and 2x2 levels
-    * (though, those levels shouldn't be used - see the max_lod setting).
-    */
-   for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) {
-      struct pipe_transfer *transfer;
-      struct pipe_box box;
-      const uint size = u_minify(aaline->texture->width0, level);
-      ubyte *data;
-      uint i, j;
-
-      assert(aaline->texture->width0 == aaline->texture->height0);
-
-      u_box_origin_2d( size, size, &box );
-
-      /* This texture is new, no need to flush. 
-       */
-      data = pipe->transfer_map(pipe,
-                                aaline->texture,
-                                level,
-                                PIPE_TRANSFER_WRITE,
-                                &box, &transfer);
-
-      if (data == NULL)
-         return FALSE;
-
-      for (i = 0; i < size; i++) {
-         for (j = 0; j < size; j++) {
-            ubyte d;
-            if (size == 1) {
-               d = 255;
-            }
-            else if (size == 2) {
-               d = 200; /* tuneable */
-            }
-            else if (i == 0 || j == 0 || i == size - 1 || j == size - 1) {
-               d = 35;  /* edge texel */
-            }
-            else {
-               d = 255;
-            }
-            data[i * transfer->stride + j] = d;
-         }
-      }
+   nir_lower_aaline_fs(aaline_fs.ir.nir, &aaline->fs->generic_attrib);
+   aaline->fs->aaline_fs = aaline->driver_create_fs_state(pipe, &aaline_fs);
+   if (aaline->fs->aaline_fs == NULL)
+      goto fail;
 
-      /* unmap */
-      pipe->transfer_unmap(pipe, transfer);
-   }
    return TRUE;
-}
-
 
-/**
- * Create the sampler CSO that'll be used for antialiasing.
- * By using a mipmapped texture, we don't have to generate a different
- * texture image for each line size.
- */
-static boolean
-aaline_create_sampler(struct aaline_stage *aaline)
-{
-   struct pipe_sampler_state sampler;
-   struct pipe_context *pipe = aaline->stage.draw->pipe;
-
-   memset(&sampler, 0, sizeof(sampler));
-   sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-   sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-   sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-   sampler.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR;
-   sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
-   sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
-   sampler.normalized_coords = 1;
-   sampler.min_lod = 0.0f;
-   sampler.max_lod = MAX_TEXTURE_LEVEL;
-
-   aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler);
-   if (aaline->sampler_cso == NULL)
-      return FALSE;
-
-   return TRUE;
+fail:
+#endif
+   return FALSE;
 }
 
-
 /**
  * When we're about to draw our first AA line in a batch, this function is
  * called to tell the driver to bind our modified fragment shader.
@@ -526,9 +355,14 @@ bind_aaline_fragment_shader(struct aaline_stage *aaline)
    struct draw_context *draw = aaline->stage.draw;
    struct pipe_context *pipe = draw->pipe;
 
-   if (!aaline->fs->aaline_fs && 
-       !generate_aaline_fs(aaline))
-      return FALSE;
+   if (!aaline->fs->aaline_fs) {
+      if (aaline->fs->state.type == PIPE_SHADER_IR_NIR) {
+         if (!generate_aaline_fs_nir(aaline))
+            return FALSE;
+      } else
+         if (!generate_aaline_fs(aaline))
+            return FALSE;
+   }
 
    draw->suspend_flushing = TRUE;
    aaline->driver_bind_fs_state(pipe, aaline->fs->aaline_fs);
@@ -539,8 +373,8 @@ bind_aaline_fragment_shader(struct aaline_stage *aaline)
 
 
 
-static INLINE struct aaline_stage *
-aaline_stage( struct draw_stage *stage )
+static inline struct aaline_stage *
+aaline_stage(struct draw_stage *stage)
 {
    return (struct aaline_stage *) stage;
 }
@@ -557,112 +391,105 @@ aaline_line(struct draw_stage *stage, struct prim_header *header)
    const float half_width = aaline->half_line_width;
    struct prim_header tri;
    struct vertex_header *v[8];
-   uint texPos = aaline->tex_slot;
+   uint coordPos = aaline->coord_slot;
    uint posPos = aaline->pos_slot;
    float *pos, *tex;
    float dx = header->v[1]->data[posPos][0] - header->v[0]->data[posPos][0];
    float dy = header->v[1]->data[posPos][1] - header->v[0]->data[posPos][1];
-   double a = atan2(dy, dx);
-   float c_a = (float) cos(a), s_a = (float) sin(a);
+   float a = atan2f(dy, dx);
+   float c_a = cosf(a), s_a = sinf(a);
+   float half_length;
+   float t_l, t_w;
    uint i;
 
-   /* XXX the ends of lines aren't quite perfect yet, but probably passable */
-   dx = 0.5F * half_width;
-   dy = half_width;
+   half_length = 0.5f * sqrtf(dx * dx + dy * dy);
+
+   if (half_length < 0.5f) {
+      /*
+       * The logic we use for "normal" sized segments is incorrect
+       * for very short segments (basically because we only have
+       * one value to interpolate, not a distance to each endpoint).
+       * Therefore, we calculate half_length differently, so that for
+       * original line length (near) 0, we get alpha 0 - otherwise
+       * max alpha would still be 0.5. This also prevents us from
+       * artifacts due to degenerated lines (the endpoints being
+       * identical, which would still receive anywhere from alpha
+       * 0-0.5 otherwise) (at least the pstipple stage may generate
+       * such lines due to float inaccuracies if line length is very
+       * close to a integer).
+       * Might not be fully accurate neither (because the "strength" of
+       * the line is going to be determined by how close to the pixel
+       * center those 1 or 2 fragments are) but it's probably the best
+       * we can do.
+       */
+      half_length = 2.0f * half_length;
+   } else {
+      half_length = half_length + 0.5f;
+   }
+
+   t_w = half_width;
+   t_l = 0.5f;
 
    /* allocate/dup new verts */
-   for (i = 0; i < 8; i++) {
-      v[i] = dup_vert(stage, header->v[i/4], i);
+   for (i = 0; i < 4; i++) {
+      v[i] = dup_vert(stage, header->v[i/2], i);
    }
 
    /*
     * Quad strip for line from v0 to v1 (*=endpoints):
     *
-    *  1   3                     5   7
-    *  +---+---------------------+---+
+    *  1                             3
+    *  +-----------------------------+
     *  |                             |
     *  | *v0                     v1* |
     *  |                             |
-    *  +---+---------------------+---+
-    *  0   2                     4   6
+    *  +-----------------------------+
+    *  0                             2
+    */
+
+   /*
+    * We increase line length by 0.5 pixels (at each endpoint),
+    * and calculate the tri endpoints by moving them half-width
+    * distance away perpendicular to the line.
+    * XXX: since we change line endpoints (by 0.5 pixel), should
+    * actually re-interpolate all other values?
     */
 
    /* new verts */
    pos = v[0]->data[posPos];
-   pos[0] += (-dx * c_a -  dy * s_a);
-   pos[1] += (-dx * s_a +  dy * c_a);
+   pos[0] += (-t_l * c_a -  t_w * s_a);
+   pos[1] += (-t_l * s_a +  t_w * c_a);
 
    pos = v[1]->data[posPos];
-   pos[0] += (-dx * c_a - -dy * s_a);
-   pos[1] += (-dx * s_a + -dy * c_a);
+   pos[0] += (-t_l * c_a - -t_w * s_a);
+   pos[1] += (-t_l * s_a + -t_w * c_a);
 
    pos = v[2]->data[posPos];
-   pos[0] += ( dx * c_a -  dy * s_a);
-   pos[1] += ( dx * s_a +  dy * c_a);
+   pos[0] += (t_l * c_a -  t_w * s_a);
+   pos[1] += (t_l * s_a +  t_w * c_a);
 
    pos = v[3]->data[posPos];
-   pos[0] += ( dx * c_a - -dy * s_a);
-   pos[1] += ( dx * s_a + -dy * c_a);
-
-   pos = v[4]->data[posPos];
-   pos[0] += (-dx * c_a -  dy * s_a);
-   pos[1] += (-dx * s_a +  dy * c_a);
-
-   pos = v[5]->data[posPos];
-   pos[0] += (-dx * c_a - -dy * s_a);
-   pos[1] += (-dx * s_a + -dy * c_a);
-
-   pos = v[6]->data[posPos];
-   pos[0] += ( dx * c_a -  dy * s_a);
-   pos[1] += ( dx * s_a +  dy * c_a);
-
-   pos = v[7]->data[posPos];
-   pos[0] += ( dx * c_a - -dy * s_a);
-   pos[1] += ( dx * s_a + -dy * c_a);
+   pos[0] += (t_l * c_a - -t_w * s_a);
+   pos[1] += (t_l * s_a + -t_w * c_a);
 
    /* new texcoords */
-   tex = v[0]->data[texPos];
-   ASSIGN_4V(tex, 0, 0, 0, 1);
-
-   tex = v[1]->data[texPos];
-   ASSIGN_4V(tex, 0, 1, 0, 1);
+   tex = v[0]->data[coordPos];
+   ASSIGN_4V(tex, -half_width, half_width, -half_length, half_length);
 
-   tex = v[2]->data[texPos];
-   ASSIGN_4V(tex, .5, 0, 0, 1);
+   tex = v[1]->data[coordPos];
+   ASSIGN_4V(tex, half_width, half_width, -half_length, half_length);
 
-   tex = v[3]->data[texPos];
-   ASSIGN_4V(tex, .5, 1, 0, 1);
+   tex = v[2]->data[coordPos];
+   ASSIGN_4V(tex, -half_width, half_width, half_length, half_length);
 
-   tex = v[4]->data[texPos];
-   ASSIGN_4V(tex, .5, 0, 0, 1);
+   tex = v[3]->data[coordPos];
+   ASSIGN_4V(tex, half_width, half_width, half_length, half_length);
 
-   tex = v[5]->data[texPos];
-   ASSIGN_4V(tex, .5, 1, 0, 1);
-
-   tex = v[6]->data[texPos];
-   ASSIGN_4V(tex, 1, 0, 0, 1);
-
-   tex = v[7]->data[texPos];
-   ASSIGN_4V(tex, 1, 1, 0, 1);
-
-   /* emit 6 tris for the quad strip */
    tri.v[0] = v[2];  tri.v[1] = v[1];  tri.v[2] = v[0];
-   stage->next->tri( stage->next, &tri );
+   stage->next->tri(stage->next, &tri);
 
    tri.v[0] = v[3];  tri.v[1] = v[1];  tri.v[2] = v[2];
-   stage->next->tri( stage->next, &tri );
-
-   tri.v[0] = v[4];  tri.v[1] = v[3];  tri.v[2] = v[2];
-   stage->next->tri( stage->next, &tri );
-
-   tri.v[0] = v[5];  tri.v[1] = v[3];  tri.v[2] = v[4];
-   stage->next->tri( stage->next, &tri );
-
-   tri.v[0] = v[6];  tri.v[1] = v[5];  tri.v[2] = v[4];
-   stage->next->tri( stage->next, &tri );
-
-   tri.v[0] = v[7];  tri.v[1] = v[5];  tri.v[2] = v[6];
-   stage->next->tri( stage->next, &tri );
+   stage->next->tri(stage->next, &tri);
 }
 
 
@@ -673,18 +500,23 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
    struct draw_context *draw = stage->draw;
    struct pipe_context *pipe = draw->pipe;
    const struct pipe_rasterizer_state *rast = draw->rasterizer;
-   uint num_samplers;
    void *r;
 
-   assert(draw->rasterizer->line_smooth);
+   assert(draw->rasterizer->line_smooth && !draw->rasterizer->multisample);
 
-   if (draw->rasterizer->line_width <= 2.2)
-      aaline->half_line_width = 1.1f;
+   if (draw->rasterizer->line_width <= 1.0)
+      aaline->half_line_width = 1.0;
    else
-      aaline->half_line_width = 0.5f * draw->rasterizer->line_width;
+      aaline->half_line_width = 0.5f * draw->rasterizer->line_width + 0.5f;
+
+   if (!draw->rasterizer->half_pixel_center)
+      /*
+       * The tex coords probably would need adjustments?
+       */
+      debug_printf("aa lines without half pixel center may be wrong\n");
 
    /*
-    * Bind (generate) our fragprog, sampler and texture
+    * Bind (generate) our fragprog
     */
    if (!bind_aaline_fragment_shader(aaline)) {
       stage->line = draw_pipe_passthrough_line;
@@ -694,23 +526,8 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
 
    draw_aaline_prepare_outputs(draw, draw->pipeline.aaline);
 
-   /* how many samplers? */
-   /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */
-   num_samplers = MAX2(aaline->num_sampler_views, aaline->num_samplers);
-   num_samplers = MAX2(num_samplers, aaline->fs->sampler_unit + 1);
-
-   aaline->state.sampler[aaline->fs->sampler_unit] = aaline->sampler_cso;
-   pipe_sampler_view_reference(&aaline->state.sampler_views[aaline->fs->sampler_unit],
-                               aaline->sampler_view);
-
    draw->suspend_flushing = TRUE;
 
-   aaline->driver_bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT, 0,
-                                      num_samplers, aaline->state.sampler);
-
-   aaline->driver_set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0,
-                                    num_samplers, aaline->state.sampler_views);
-
    /* Disable triangle culling, stippling, unfilled mode etc. */
    r = draw_get_rasterizer_no_cull(draw, rast->scissor, rast->flatshade);
    pipe->bind_rasterizer_state(pipe, r);
@@ -731,20 +548,12 @@ aaline_flush(struct draw_stage *stage, unsigned flags)
    struct pipe_context *pipe = draw->pipe;
 
    stage->line = aaline_first_line;
-   stage->next->flush( stage->next, flags );
+   stage->next->flush(stage->next, flags);
 
-   /* restore original frag shader, texture, sampler state */
+   /* restore original frag shader */
    draw->suspend_flushing = TRUE;
    aaline->driver_bind_fs_state(pipe, aaline->fs ? aaline->fs->driver_fs : NULL);
 
-   aaline->driver_bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT, 0,
-                                      aaline->num_samplers,
-                                      aaline->state.sampler);
-
-   aaline->driver_set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0,
-                                    aaline->num_samplers,
-                                    aaline->state.sampler_views);
-
    /* restore original rasterizer state */
    if (draw->rast_handle) {
       pipe->bind_rasterizer_state(pipe, draw->rast_handle);
@@ -759,7 +568,7 @@ aaline_flush(struct draw_stage *stage, unsigned flags)
 static void
 aaline_reset_stipple_counter(struct draw_stage *stage)
 {
-   stage->next->reset_stipple_counter( stage->next );
+   stage->next->reset_stipple_counter(stage->next);
 }
 
 
@@ -768,33 +577,15 @@ aaline_destroy(struct draw_stage *stage)
 {
    struct aaline_stage *aaline = aaline_stage(stage);
    struct pipe_context *pipe = stage->draw->pipe;
-   uint i;
 
-   for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
-      pipe_sampler_view_reference(&aaline->state.sampler_views[i], NULL);
-   }
-
-   if (aaline->sampler_cso)
-      pipe->delete_sampler_state(pipe, aaline->sampler_cso);
-
-   if (aaline->texture)
-      pipe_resource_reference(&aaline->texture, NULL);
-
-   if (aaline->sampler_view) {
-      pipe_sampler_view_reference(&aaline->sampler_view, NULL);
-   }
-
-   draw_free_temp_verts( stage );
+   draw_free_temp_verts(stage);
 
    /* restore the old entry points */
    pipe->create_fs_state = aaline->driver_create_fs_state;
    pipe->bind_fs_state = aaline->driver_bind_fs_state;
    pipe->delete_fs_state = aaline->driver_delete_fs_state;
 
-   pipe->bind_sampler_states = aaline->driver_bind_sampler_states;
-   pipe->set_sampler_views = aaline->driver_set_sampler_views;
-
-   FREE( stage );
+   FREE(stage);
 }
 
 
@@ -802,7 +593,7 @@ static struct aaline_stage *
 draw_aaline_stage(struct draw_context *draw)
 {
    struct aaline_stage *aaline = CALLOC_STRUCT(aaline_stage);
-   if (aaline == NULL)
+   if (!aaline)
       return NULL;
 
    aaline->stage.draw = draw;
@@ -815,14 +606,13 @@ draw_aaline_stage(struct draw_context *draw)
    aaline->stage.reset_stipple_counter = aaline_reset_stipple_counter;
    aaline->stage.destroy = aaline_destroy;
 
-   if (!draw_alloc_temp_verts( &aaline->stage, 8 ))
+   if (!draw_alloc_temp_verts(&aaline->stage, 8))
       goto fail;
 
    return aaline;
 
  fail:
-   if (aaline)
-      aaline->stage.destroy(&aaline->stage);
+   aaline->stage.destroy(&aaline->stage);
 
    return NULL;
 }
@@ -843,7 +633,7 @@ aaline_stage_from_pipe(struct pipe_context *pipe)
 
 /**
  * This function overrides the driver's create_fs_state() function and
- * will typically be called by the state tracker.
+ * will typically be called by the gallium frontend.
  */
 static void *
 aaline_create_fs_state(struct pipe_context *pipe,
@@ -852,15 +642,21 @@ aaline_create_fs_state(struct pipe_context *pipe,
    struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
    struct aaline_fragment_shader *aafs = NULL;
 
-   if (aaline == NULL)
+   if (!aaline)
       return NULL;
 
    aafs = CALLOC_STRUCT(aaline_fragment_shader);
 
-   if (aafs == NULL)
+   if (!aafs)
       return NULL;
 
-   aafs->state.tokens = tgsi_dup_tokens(fs->tokens);
+   aafs->state.type = fs->type;
+   if (fs->type == PIPE_SHADER_IR_TGSI)
+      aafs->state.tokens = tgsi_dup_tokens(fs->tokens);
+#ifdef LLVM_AVAILABLE
+   else
+      aafs->state.ir.nir = nir_shader_clone(NULL, fs->ir.nir);
+#endif
 
    /* pass-through */
    aafs->driver_fs = aaline->driver_create_fs_state(pipe, fs);
@@ -875,7 +671,7 @@ aaline_bind_fs_state(struct pipe_context *pipe, void *fs)
    struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
    struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs;
 
-   if (aaline == NULL) {
+   if (!aaline) {
       return;
    }
 
@@ -892,11 +688,11 @@ aaline_delete_fs_state(struct pipe_context *pipe, void *fs)
    struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
    struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs;
 
-   if (aafs == NULL) {
+   if (!aafs) {
       return;
    }
 
-   if (aaline != NULL) {
+   if (aaline) {
       /* pass-through */
       aaline->driver_delete_fs_state(pipe, aafs->driver_fs);
 
@@ -904,60 +700,14 @@ aaline_delete_fs_state(struct pipe_context *pipe, void *fs)
          aaline->driver_delete_fs_state(pipe, aafs->aaline_fs);
    }
 
-   FREE((void*)aafs->state.tokens);
+   if (aafs->state.type == PIPE_SHADER_IR_TGSI)
+      FREE((void*)aafs->state.tokens);
+   else
+      ralloc_free(aafs->state.ir.nir);
    FREE(aafs);
 }
 
 
-static void
-aaline_bind_sampler_states(struct pipe_context *pipe, unsigned shader,
-                           unsigned start, unsigned num, void **sampler)
-{
-   struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
-
-   assert(start == 0);
-
-   if (aaline == NULL) {
-      return;
-   }
-
-   if (shader == PIPE_SHADER_FRAGMENT) {
-      /* save current */
-      memcpy(aaline->state.sampler, sampler, num * sizeof(void *));
-      aaline->num_samplers = num;
-   }
-
-   /* pass-through */
-   aaline->driver_bind_sampler_states(pipe, shader, start, num, sampler);
-}
-
-
-static void
-aaline_set_sampler_views(struct pipe_context *pipe, unsigned shader,
-                         unsigned start, unsigned num,
-                         struct pipe_sampler_view **views)
-{
-   struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
-   uint i;
-
-   if (aaline == NULL) {
-      return;
-   }
-
-   if (shader == PIPE_SHADER_FRAGMENT) {
-      /* save current */
-      for (i = 0; i < num; i++) {
-         pipe_sampler_view_reference(&aaline->state.sampler_views[start + i],
-                                     views[i]);
-      }
-      aaline->num_sampler_views = num;
-   }
-
-   /* pass-through */
-   aaline->driver_set_sampler_views(pipe, shader, start, num, views);
-}
-
-
 void
 draw_aaline_prepare_outputs(struct draw_context *draw,
                             struct draw_stage *stage)
@@ -966,15 +716,18 @@ draw_aaline_prepare_outputs(struct draw_context *draw,
    const struct pipe_rasterizer_state *rast = draw->rasterizer;
 
    /* update vertex attrib info */
-   aaline->pos_slot = draw_current_shader_position_output(draw);;
+   aaline->pos_slot = draw_current_shader_position_output(draw);
 
-   if (!rast->line_smooth)
+   if (!rast->line_smooth || rast->multisample)
       return;
 
    /* allocate the extra post-transformed vertex attribute */
-   aaline->tex_slot = draw_alloc_extra_vertex_attrib(draw,
-                                                     TGSI_SEMANTIC_GENERIC,
-                                                     aaline->fs->generic_attrib);
+   if (aaline->fs && aaline->fs->aaline_fs)
+      aaline->coord_slot = draw_alloc_extra_vertex_attrib(draw,
+                                                          TGSI_SEMANTIC_GENERIC,
+                                                          aaline->fs->generic_attrib);
+   else
+      aaline->coord_slot = -1;
 }
 
 /**
@@ -992,7 +745,7 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe)
    /*
     * Create / install AA line drawing / prim stage
     */
-   aaline = draw_aaline_stage( draw );
+   aaline = draw_aaline_stage(draw);
    if (!aaline)
       goto fail;
 
@@ -1001,33 +754,20 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe)
    aaline->driver_bind_fs_state = pipe->bind_fs_state;
    aaline->driver_delete_fs_state = pipe->delete_fs_state;
 
-   aaline->driver_bind_sampler_states = pipe->bind_sampler_states;
-   aaline->driver_set_sampler_views = pipe->set_sampler_views;
-
-   /* create special texture, sampler state */
-   if (!aaline_create_texture(aaline))
-      goto fail;
-
-   if (!aaline_create_sampler(aaline))
-      goto fail;
-
    /* override the driver's functions */
    pipe->create_fs_state = aaline_create_fs_state;
    pipe->bind_fs_state = aaline_bind_fs_state;
    pipe->delete_fs_state = aaline_delete_fs_state;
 
-   pipe->bind_sampler_states = aaline_bind_sampler_states;
-   pipe->set_sampler_views = aaline_set_sampler_views;
-   
    /* Install once everything is known to be OK:
     */
    draw->pipeline.aaline = &aaline->stage;
 
    return TRUE;
 
- fail:
+fail:
    if (aaline)
-      aaline->stage.destroy( &aaline->stage );
-   
+      aaline->stage.destroy(&aaline->stage);
+
    return FALSE;
 }