st/nine: Fix Pointsize in programmable shader
authorAxel Davy <axel.davy@ens.fr>
Sun, 6 Mar 2016 14:54:50 +0000 (15:54 +0100)
committerAxel Davy <axel.davy@ens.fr>
Wed, 18 May 2016 21:37:14 +0000 (23:37 +0200)
Signed-off-by: Axel Davy <axel.davy@ens.fr>
src/gallium/state_trackers/nine/nine_shader.c
src/gallium/state_trackers/nine/nine_shader.h
src/gallium/state_trackers/nine/nine_state.c
src/gallium/state_trackers/nine/nine_state.h
src/gallium/state_trackers/nine/pixelshader9.c
src/gallium/state_trackers/nine/pixelshader9.h
src/gallium/state_trackers/nine/vertexshader9.c
src/gallium/state_trackers/nine/vertexshader9.h

index 1a54fc1f037cff0004d6e036e0c01a369c33cc0f..972245f475323d9931c2eae4ea909b86fba93896 100644 (file)
@@ -1149,8 +1149,7 @@ _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
             break;
         case 2:
             if (ureg_dst_is_undef(tx->regs.oPts))
-                tx->regs.oPts =
-                    ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0));
+                tx->regs.oPts = ureg_DECL_temporary(tx->ureg);
             dst = tx->regs.oPts;
             break;
         default:
@@ -2044,8 +2043,10 @@ DECL_SPECIAL(DCL)
             tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
                 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
 
-            if (tgsi.Name == TGSI_SEMANTIC_PSIZE)
+            if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
+                tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
                 tx->regs.oPts = tx->regs.o[sem.reg.idx];
+            }
         }
     } else {
         if (is_input && tx->version.major >= 3) {
@@ -3424,10 +3425,14 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
     if (info->position_t)
         ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
 
-    ureg_END(tx->ureg);
-
-    if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts))
+    if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
+        struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
+        ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
+        ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
         info->point_size = TRUE;
+    }
+
+    ureg_END(tx->ureg);
 
     /* record local constants */
     if (tx->num_lconstf && tx->indirect_const_access) {
index 1fe0c4bd182aef8761c29db52ed7e99a17c56c79..7f2f57f0d343cc5d9e828a8475e46d5889cb7eaa 100644 (file)
@@ -53,6 +53,8 @@ struct nine_shader_info
 
     boolean position_t; /* out, true if VP writes pre-transformed position */
     boolean point_size; /* out, true if VP writes point size */
+    float point_size_min;
+    float point_size_max;
 
     uint32_t sampler_ps1xtypes; /* 2 bits per sampler */
     uint16_t sampler_mask; /* out, which samplers are being used */
@@ -103,11 +105,11 @@ struct nine_shader_variant
 {
     struct nine_shader_variant *next;
     void *cso;
-    uint32_t key;
+    uint64_t key;
 };
 
 static inline void *
-nine_shader_variant_get(struct nine_shader_variant *list, uint32_t key)
+nine_shader_variant_get(struct nine_shader_variant *list, uint64_t key)
 {
     while (list->key != key && list->next)
         list = list->next;
@@ -118,7 +120,7 @@ nine_shader_variant_get(struct nine_shader_variant *list, uint32_t key)
 
 static inline boolean
 nine_shader_variant_add(struct nine_shader_variant *list,
-                        uint32_t key, void *cso)
+                          uint64_t key, void *cso)
 {
     while (list->next) {
         assert(list->key != key);
@@ -143,48 +145,4 @@ nine_shader_variants_free(struct nine_shader_variant *list)
     }
 }
 
-struct nine_shader_variant64
-{
-    struct nine_shader_variant64 *next;
-    void *cso;
-    uint64_t key;
-};
-
-static inline void *
-nine_shader_variant_get64(struct nine_shader_variant64 *list, uint64_t key)
-{
-    while (list->key != key && list->next)
-        list = list->next;
-    if (list->key == key)
-        return list->cso;
-    return NULL;
-}
-
-static inline boolean
-nine_shader_variant_add64(struct nine_shader_variant64 *list,
-                          uint64_t key, void *cso)
-{
-    while (list->next) {
-        assert(list->key != key);
-        list = list->next;
-    }
-    list->next = MALLOC_STRUCT(nine_shader_variant64);
-    if (!list->next)
-        return FALSE;
-    list->next->next = NULL;
-    list->next->key = key;
-    list->next->cso = cso;
-    return TRUE;
-}
-
-static inline void
-nine_shader_variants_free64(struct nine_shader_variant64 *list)
-{
-    while (list->next) {
-        struct nine_shader_variant64 *ptr = list->next;
-        list->next = ptr->next;
-        FREE(ptr);
-    }
-}
-
 #endif /* _NINE_SHADER_H_ */
index 2172a63d2c303d0aabfd4bef0675c7621cf42ad3..5013b582df7275d62b82a2945d012e02abc84db4 100644 (file)
@@ -894,7 +894,8 @@ commit_ps(struct NineDevice9 *device)
 #define NINE_STATE_SHADER_CHANGE_VS \
    (NINE_STATE_VS |         \
     NINE_STATE_TEXTURE |    \
-    NINE_STATE_FOG_SHADER)
+    NINE_STATE_FOG_SHADER | \
+    NINE_STATE_POINTSIZE_SHADER)
 
 #define NINE_STATE_SHADER_CHANGE_PS \
    (NINE_STATE_PS |         \
@@ -1524,7 +1525,7 @@ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] =
     [D3DRS_VERTEXBLEND] = NINE_STATE_FF_OTHER,
     [D3DRS_CLIPPLANEENABLE] = NINE_STATE_RASTERIZER,
     [D3DRS_POINTSIZE] = NINE_STATE_RASTERIZER,
-    [D3DRS_POINTSIZE_MIN] = NINE_STATE_RASTERIZER,
+    [D3DRS_POINTSIZE_MIN] = NINE_STATE_RASTERIZER | NINE_STATE_POINTSIZE_SHADER,
     [D3DRS_POINTSPRITEENABLE] = NINE_STATE_RASTERIZER,
     [D3DRS_POINTSCALEENABLE] = NINE_STATE_FF_OTHER,
     [D3DRS_POINTSCALE_A] = NINE_STATE_FF_OTHER,
@@ -1534,7 +1535,7 @@ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] =
     [D3DRS_MULTISAMPLEMASK] = NINE_STATE_SAMPLE_MASK,
     [D3DRS_PATCHEDGESTYLE] = NINE_STATE_UNHANDLED,
     [D3DRS_DEBUGMONITORTOKEN] = NINE_STATE_UNHANDLED,
-    [D3DRS_POINTSIZE_MAX] = NINE_STATE_RASTERIZER,
+    [D3DRS_POINTSIZE_MAX] = NINE_STATE_RASTERIZER | NINE_STATE_POINTSIZE_SHADER,
     [D3DRS_INDEXEDVERTEXBLENDENABLE] = NINE_STATE_FF_OTHER,
     [D3DRS_COLORWRITEENABLE] = NINE_STATE_BLEND,
     [D3DRS_TWEENFACTOR] = NINE_STATE_FF_OTHER,
index a4ec4e3b63acd1c31b0b7a4b0e1f1477eab69b67..91bf0cdb6e9625d5901fa3fa57787687ebff136f 100644 (file)
@@ -77,8 +77,9 @@
 #define NINE_STATE_FF_OTHER    (1 << 24)
 #define NINE_STATE_FOG_SHADER  (1 << 25)
 #define NINE_STATE_PS1X_SHADER (1 << 26)
-#define NINE_STATE_ALL          0x7ffffff
-#define NINE_STATE_UNHANDLED   (1 << 27)
+#define NINE_STATE_POINTSIZE_SHADER (1 << 27)
+#define NINE_STATE_ALL          0xfffffff
+#define NINE_STATE_UNHANDLED   (1 << 28)
 
 #define NINE_STATE_COMMIT_DSA  (1 << 0)
 #define NINE_STATE_COMMIT_RASTERIZER (1 << 1)
index 541919c130cce7c2fcdca91d43d4840012b17e1b..8bf4f4bee275416a468399d0b58fb00b693788ab 100644 (file)
@@ -92,7 +92,7 @@ NinePixelShader9_dtor( struct NinePixelShader9 *This )
 
     if (This->base.device) {
         struct pipe_context *pipe = This->base.device->pipe;
-        struct nine_shader_variant64 *var = &This->variant;
+        struct nine_shader_variant *var = &This->variant;
 
         do {
             if (var->cso) {
@@ -109,7 +109,7 @@ NinePixelShader9_dtor( struct NinePixelShader9 *This )
             pipe->delete_fs_state(pipe, This->ff_cso);
         }
     }
-    nine_shader_variants_free64(&This->variant);
+    nine_shader_variants_free(&This->variant);
 
     FREE((void *)This->byte_code.tokens); /* const_cast */
 
@@ -146,7 +146,7 @@ NinePixelShader9_GetVariant( struct NinePixelShader9 *This )
     if (key == This->last_key)
         return This->last_cso;
 
-    cso = nine_shader_variant_get64(&This->variant, key);
+    cso = nine_shader_variant_get(&This->variant, key);
     if (!cso) {
         struct NineDevice9 *device = This->base.device;
         struct nine_shader_info info;
@@ -166,7 +166,7 @@ NinePixelShader9_GetVariant( struct NinePixelShader9 *This )
         hr = nine_translate_shader(This->base.device, &info);
         if (FAILED(hr))
             return NULL;
-        nine_shader_variant_add64(&This->variant, key, info.cso);
+        nine_shader_variant_add(&This->variant, key, info.cso);
         cso = info.cso;
     }
 
index 1fef1c47dd111b7e8f3ecd14fdf983679380f96d..9a615b7b205f009f01b7f06345235eeca93f316d 100644 (file)
@@ -35,7 +35,7 @@ struct nine_lconstf;
 struct NinePixelShader9
 {
     struct NineUnknown base;
-    struct nine_shader_variant64 variant;
+    struct nine_shader_variant variant;
 
     struct {
         const DWORD *tokens;
index eb426ca67d741f81eb035ea2b1c61811e886a229..31135758ff1400e68a22c396eb5e0162d07e4b85 100644 (file)
@@ -61,6 +61,8 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This,
     info.sampler_mask_shadow = 0x0;
     info.sampler_ps1xtypes = 0x0;
     info.fog_enable = 0;
+    info.point_size_min = 0;
+    info.point_size_max = 0;
 
     hr = nine_translate_shader(device, &info);
     if (FAILED(hr))
@@ -145,7 +147,7 @@ void *
 NineVertexShader9_GetVariant( struct NineVertexShader9 *This )
 {
     void *cso;
-    uint32_t key;
+    uint64_t key;
 
     key = This->next_key;
     if (key == This->last_key)
@@ -163,6 +165,8 @@ NineVertexShader9_GetVariant( struct NineVertexShader9 *This )
         info.byte_code = This->byte_code.tokens;
         info.sampler_mask_shadow = key & 0xf;
         info.fog_enable = device->state.rs[D3DRS_FOGENABLE];
+        info.point_size_min = asfloat(device->state.rs[D3DRS_POINTSIZE_MIN]);
+        info.point_size_max = asfloat(device->state.rs[D3DRS_POINTSIZE_MAX]);
 
         hr = nine_translate_shader(This->base.device, &info);
         if (FAILED(hr))
index c89d4a1ee24853e5119736c04823f3fd5cabdf90..3c9db7990a003b5a592170df6eaedecdfea66a56 100644 (file)
 #ifndef _NINE_VERTEXSHADER9_H_
 #define _NINE_VERTEXSHADER9_H_
 
+#include "util/u_half.h"
+
 #include "iunknown.h"
+#include "nine_helpers.h"
 #include "nine_shader.h"
 #include "nine_state.h"
 
@@ -57,10 +60,10 @@ struct NineVertexShader9
     uint64_t ff_key[3];
     void *ff_cso;
 
-    uint32_t last_key;
+    uint64_t last_key;
     void *last_cso;
 
-    uint32_t next_key;
+    uint64_t next_key;
 };
 static inline struct NineVertexShader9 *
 NineVertexShader9( void *data )
@@ -73,7 +76,7 @@ NineVertexShader9_UpdateKey( struct NineVertexShader9 *vs,
                              struct nine_state *state )
 {
     uint8_t samplers_shadow;
-    uint32_t key;
+    uint64_t key;
     BOOL res;
 
     samplers_shadow = (uint8_t)((state->samplers_shadow & NINE_VS_SAMPLERS_MASK) >> NINE_SAMPLER_VS(0));
@@ -81,7 +84,15 @@ NineVertexShader9_UpdateKey( struct NineVertexShader9 *vs,
     key = samplers_shadow;
 
     if (vs->byte_code.version < 0x30)
-        key |= state->rs[D3DRS_FOGENABLE] << 8;
+        key |= (uint32_t) (state->rs[D3DRS_FOGENABLE] << 8);
+
+    /* We want to use a 64 bits key for performance.
+     * Use compressed float16 values for the pointsize min/max in the key.
+     * Shaders do not usually output psize.*/
+    if (vs->point_size) {
+        key |= ((uint64_t)util_float_to_half(asfloat(state->rs[D3DRS_POINTSIZE_MIN]))) << 32;
+        key |= ((uint64_t)util_float_to_half(asfloat(state->rs[D3DRS_POINTSIZE_MAX]))) << 48;
+    }
 
     res = vs->last_key != key;
     if (res)