From 2c326e72664e65166c68b027b26aaf373f3be36d Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Thu, 4 Feb 2010 19:23:09 +0100
Subject: [PATCH] gallium: add point size clamp to implementation limits in
 vertex shader

The point size min/max registers (unused by mesa state tracker) were removed
since most hardware couldn't do much with them. However, we don't want to have
to rely on hw to do point size clamping correctly to implementation
dependent limits, hence have to do that in the vertex shader. This should also
solve a potential problem with (non-AA) points smaller than 1.0 which according
to OGL still have size 1.0.
Note that OGL point rendering is odd, in particular point sprites are rasterized
differently to points. Some hardware might support those different modes, but in
any case the different clamping values used for smooth/multisampled/sprite
enabled points might help a bit for hw which rasterizes points the same as point
sprites.
Also tweak mesa's ff to vertex shader translation so don't have to clamp twice in
case of point attenuation.
---
 src/mesa/main/ffvertex_prog.c            |  2 +-
 src/mesa/shader/prog_statevars.c         | 64 ++++++++++++++++++++++++
 src/mesa/shader/prog_statevars.h         |  2 +
 src/mesa/state_tracker/st_extensions.c   |  5 ++
 src/mesa/state_tracker/st_mesa_to_tgsi.c | 36 +++++++++++++
 5 files changed, 108 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c
index 2d1db29cbfe..867a55242c0 100644
--- a/src/mesa/main/ffvertex_prog.c
+++ b/src/mesa/main/ffvertex_prog.c
@@ -1496,7 +1496,7 @@ static void build_texture_transform( struct tnl_program *p )
 static void build_atten_pointsize( struct tnl_program *p )
 {
    struct ureg eye = get_eye_position_z(p);
-   struct ureg state_size = register_param1(p, STATE_POINT_SIZE);
+   struct ureg state_size = register_param2(p, STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED);
    struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION);
    struct ureg out = register_output(p, VERT_RESULT_PSIZ);
    struct ureg ut = get_temp(p);
diff --git a/src/mesa/shader/prog_statevars.c b/src/mesa/shader/prog_statevars.c
index 058d4bbafb7..460ecd44a84 100644
--- a/src/mesa/shader/prog_statevars.c
+++ b/src/mesa/shader/prog_statevars.c
@@ -444,6 +444,61 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[],
          value[3] = (GLfloat)(ctx->Fog.Density * ONE_DIV_SQRT_LN2);
          return;
 
+      case STATE_POINT_SIZE_CLAMPED:
+         {
+           /* this includes implementation dependent limits, to avoid
+            * another potentially necessary clamp.
+            * Note: for sprites, point smooth (point AA) is ignored
+            * and we'll clamp to MinPointSizeAA and MaxPointSize, because we
+            * expect drivers will want to say their minimum for AA size is 0.0
+            * but for non-AA it's 1.0 (because normal points with size below 1.0
+            * need to get rounded up to 1.0, hence never disappear). GL does
+            * not specify max clamp size for sprites, other than it needs to be
+            * at least as large as max AA size, hence use non-AA size there.
+            */
+            GLfloat minImplSize;
+            GLfloat maxImplSize;
+            if (ctx->Point.PointSprite) {
+               minImplSize = ctx->Const.MinPointSizeAA;
+               maxImplSize = ctx->Const.MaxPointSize;
+            }
+            else if (ctx->Point.SmoothFlag || ctx->Multisample._Enabled) {
+               minImplSize = ctx->Const.MinPointSizeAA;
+               maxImplSize = ctx->Const.MaxPointSizeAA;
+            }
+            else {
+               minImplSize = ctx->Const.MinPointSize;
+               maxImplSize = ctx->Const.MaxPointSize;
+            }
+            value[0] = ctx->Point.Size;
+            value[1] = ctx->Point.MinSize >= minImplSize ? ctx->Point.MinSize : minImplSize;
+            value[2] = ctx->Point.MaxSize <= maxImplSize ? ctx->Point.MaxSize : maxImplSize;
+            value[3] = ctx->Point.Threshold;
+         }
+         return;
+      case STATE_POINT_SIZE_IMPL_CLAMP:
+         {
+           /* for implementation clamp only in vs */
+            GLfloat minImplSize;
+            GLfloat maxImplSize;
+            if (ctx->Point.PointSprite) {
+               minImplSize = ctx->Const.MinPointSizeAA;
+               maxImplSize = ctx->Const.MaxPointSize;
+            }
+            else if (ctx->Point.SmoothFlag || ctx->Multisample._Enabled) {
+               minImplSize = ctx->Const.MinPointSizeAA;
+               maxImplSize = ctx->Const.MaxPointSizeAA;
+            }
+            else {
+               minImplSize = ctx->Const.MinPointSize;
+               maxImplSize = ctx->Const.MaxPointSize;
+            }
+            value[0] = ctx->Point.Size;
+            value[1] = minImplSize;
+            value[2] = maxImplSize;
+            value[3] = ctx->Point.Threshold;
+         }
+         return;
       case STATE_LIGHT_SPOT_DIR_NORMALIZED:
          {
             /* here, state[2] is the light number */
@@ -639,6 +694,9 @@ _mesa_program_state_flags(const gl_state_index state[STATE_LENGTH])
 	 return _NEW_TEXTURE;
       case STATE_FOG_PARAMS_OPTIMIZED:
 	 return _NEW_FOG;
+      case STATE_POINT_SIZE_CLAMPED:
+      case STATE_POINT_SIZE_IMPL_CLAMP:
+         return _NEW_POINT | _NEW_MULTISAMPLE;
       case STATE_LIGHT_SPOT_DIR_NORMALIZED:
       case STATE_LIGHT_POSITION:
       case STATE_LIGHT_POSITION_NORMALIZED:
@@ -830,6 +888,12 @@ append_token(char *dst, gl_state_index k)
    case STATE_FOG_PARAMS_OPTIMIZED:
       append(dst, "fogParamsOptimized");
       break;
+   case STATE_POINT_SIZE_CLAMPED:
+      append(dst, "pointSizeClamped");
+      break;
+   case STATE_POINT_SIZE_IMPL_CLAMP:
+      append(dst, "pointSizeImplClamp");
+      break;
    case STATE_LIGHT_SPOT_DIR_NORMALIZED:
       append(dst, "lightSpotDirNormalized");
       break;
diff --git a/src/mesa/shader/prog_statevars.h b/src/mesa/shader/prog_statevars.h
index 1180d9eaa4a..1753471ffb6 100644
--- a/src/mesa/shader/prog_statevars.h
+++ b/src/mesa/shader/prog_statevars.h
@@ -108,6 +108,8 @@ typedef enum gl_state_index_ {
    STATE_NORMAL_SCALE,
    STATE_TEXRECT_SCALE,
    STATE_FOG_PARAMS_OPTIMIZED,  /* for faster fog calc */
+   STATE_POINT_SIZE_CLAMPED,    /* includes implementation dependent size clamp */
+   STATE_POINT_SIZE_IMPL_CLAMP, /* for implementation clamp only in vs */
    STATE_LIGHT_SPOT_DIR_NORMALIZED,   /* pre-normalized spot dir */
    STATE_LIGHT_POSITION,              /* object vs eye space */
    STATE_LIGHT_POSITION_NORMALIZED,   /* object vs eye space */
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 35e08749df7..7684ccd702b 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -114,6 +114,11 @@ void st_init_limits(struct st_context *st)
       = _maxf(1.0f, screen->get_paramf(screen, PIPE_CAP_MAX_POINT_WIDTH));
    c->MaxPointSizeAA
       = _maxf(1.0f, screen->get_paramf(screen, PIPE_CAP_MAX_POINT_WIDTH_AA));
+   /* these are not queryable. Note that GL basically mandates a 1.0 minimum
+    * for non-aa sizes, but we can go down to 0.0 for aa points.
+    */
+   c->MinPointSize = 1.0f;
+   c->MinPointSizeAA = 0.0f;
 
    c->MaxTextureMaxAnisotropy
       = _maxf(2.0f, screen->get_paramf(screen, PIPE_CAP_MAX_TEXTURE_ANISOTROPY));
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index e788008dfe1..4830a8f383a 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -57,6 +57,10 @@ struct st_translate {
    struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
    struct ureg_dst address[1];
    struct ureg_src samplers[PIPE_MAX_SAMPLERS];
+   struct ureg_dst psizregreal;
+   struct ureg_src pointSizeConst;
+   GLint psizoutindex;
+   GLboolean prevInstWrotePsiz;
 
    const GLuint *inputMapping;
    const GLuint *outputMapping;
@@ -145,6 +149,8 @@ dst_register( struct st_translate *t,
       return t->temps[index];
 
    case PROGRAM_OUTPUT:
+      if (index == t->psizoutindex)
+         t->prevInstWrotePsiz = GL_TRUE;
       return t->outputs[t->outputMapping[index]];
 
    case PROGRAM_ADDRESS:
@@ -787,6 +793,8 @@ st_translate_mesa_program(
    t->inputMapping = inputMapping;
    t->outputMapping = outputMapping;
    t->ureg = ureg;
+   t->psizoutindex = -1;
+   t->prevInstWrotePsiz = GL_FALSE;
 
    /*_mesa_print_program(program);*/
 
@@ -845,6 +853,21 @@ st_translate_mesa_program(
          t->outputs[i] = ureg_DECL_output( ureg,
                                            outputSemanticName[i],
                                            outputSemanticIndex[i] );
+         if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && program->Id) {
+            static const gl_state_index pointSizeClampState[STATE_LENGTH]
+               = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, 0, 0, 0 };
+               /* XXX: note we are modifying the incoming shader here!  Need to
+               * do this before emitting the constant decls below, or this
+               * will be missed:
+               */
+            unsigned pointSizeClampConst = _mesa_add_state_reference(program->Parameters,
+                                                                     pointSizeClampState);
+            struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg );
+            t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst );
+            t->psizregreal = t->outputs[i];
+            t->psizoutindex = i;
+            t->outputs[i] = psizregtemp;
+         }
       }
       if (passthrough_edgeflags)
          emit_edgeflags( t, program );
@@ -910,6 +933,19 @@ st_translate_mesa_program(
    for (i = 0; i < program->NumInstructions; i++) {
       set_insn_start( t, ureg_get_instruction_number( ureg ));
       compile_instruction( t, &program->Instructions[i] );
+
+      /* note can't do that easily at the end of prog due to
+         possible early return */
+      if (t->prevInstWrotePsiz && program->Id) {
+         set_insn_start( t, ureg_get_instruction_number( ureg ));
+         ureg_MAX( t->ureg, ureg_writemask(t->outputs[t->psizoutindex], WRITEMASK_X),
+                   ureg_src(t->outputs[t->psizoutindex]),
+                   ureg_swizzle(t->pointSizeConst, 1,1,1,1));
+         ureg_MIN( t->ureg, ureg_writemask(t->psizregreal, WRITEMASK_X),
+                   ureg_src(t->outputs[t->psizoutindex]),
+                   ureg_swizzle(t->pointSizeConst, 2,2,2,2));
+      }
+      t->prevInstWrotePsiz = GL_FALSE;
    }
 
    /* Fix up all emitted labels:
-- 
2.30.2