r300: Fix texture coordinate calculation for rectangle textures
authorNicolai Haehnle <nhaehnle@gmail.com>
Sat, 24 Mar 2007 18:08:26 +0000 (19:08 +0100)
committerNicolai Haehnle <nhaehnle@gmail.com>
Sat, 24 Mar 2007 18:09:44 +0000 (19:09 +0100)
R300 hardware takes texcoords in the range 0..1 even for rectangle
textures. Previously, the necessary texcoord conversion was applied
to the texture coordinate during vertex processing in a render stage.

This is obviously wrong when fragment programs are used, which can
calculate arbitrary coordinates for TEX instructions. Therefore,
we now inject an appropriate MUL instruction before a TEX that
reference a rectangle texture.

src/mesa/drivers/dri/r300/r300_context.c
src/mesa/drivers/dri/r300/r300_context.h
src/mesa/drivers/dri/r300/r300_fragprog.c
src/mesa/drivers/dri/r300/r300_render.c
src/mesa/drivers/dri/r300/r300_state.c

index d10a9d87d3115bb4c1e48f0e1b5ca823d8170838..68c8f5b87d0c65f6d54127e8550761172e1da0cd 100644 (file)
@@ -133,7 +133,6 @@ const struct dri_extension stencil_two_side[] = {
 
 extern struct tnl_pipeline_stage _r300_render_stage;
 extern const struct tnl_pipeline_stage _r300_tcl_stage;
-extern const struct tnl_pipeline_stage _r300_texrect_stage;
 
 static const struct tnl_pipeline_stage *r300_pipeline[] = {
 
@@ -164,8 +163,6 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = {
 
        /* Else do them here.
         */
-       /* scale texture rectangle to 0..1. */
-       &_r300_texrect_stage,
        &_r300_render_stage,
        &_tnl_render_stage,     /* FALLBACK  */
        0,
@@ -204,7 +201,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
                                                     "def_max_anisotropy");
 
        //r300->texmicrotile = GL_TRUE;
-       
+
        /* Init default driver functions then plug in our R300-specific functions
         * (the texture functions are especially important)
         */
@@ -213,7 +210,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
        r300InitStateFuncs(&functions);
        r300InitTextureFuncs(&functions);
        r300InitShaderFuncs(&functions);
-       
+
 #ifdef USER_BUFFERS
        radeon_mm_init(r300);
 #endif
@@ -221,7 +218,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
        if (hw_tcl_on) {
                r300_init_vbo_funcs(&functions);
        }
-#endif 
+#endif
        if (!radeonInitContext(&r300->radeon, &functions,
                               glVisual, driContextPriv, sharedContextPrivate)) {
                FREE(r300);
@@ -264,8 +261,8 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
         * texturable memory at once.
         */
 
-       ctx = r300->radeon.glCtx; 
-       
+       ctx = r300->radeon.glCtx;
+
        ctx->Const.MaxTextureImageUnits = driQueryOptioni(&r300->radeon.optionCache,
                                                     "texture_image_units");
        ctx->Const.MaxTextureCoordUnits = driQueryOptioni(&r300->radeon.optionCache,
@@ -283,7 +280,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
        ctx->Const.MinLineWidthAA = 1.0;
        ctx->Const.MaxLineWidth = R300_LINESIZE_MAX;
        ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX;
-       
+
 #ifdef USER_BUFFERS
        /* Needs further modifications */
 #if 0
@@ -337,10 +334,10 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
        ctx->_MaintainTexEnvProgram = GL_TRUE;
 
        driInitExtensions(ctx, card_extensions, GL_TRUE);
-       
+
        if (driQueryOptionb(&r300->radeon.optionCache, "disable_stencil_two_side") == 0)
                driInitSingleExtension(ctx, stencil_two_side);
-       
+
        if (r300->radeon.glCtx->Mesa_DXTn && !driQueryOptionb (&r300->radeon.optionCache, "disable_s3tc")) {
          _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
          _mesa_enable_extension( ctx, "GL_S3_s3tc" );
@@ -354,7 +351,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
        radeonInitSpanFuncs(ctx);
        r300InitCmdBuf(r300);
        r300InitState(r300);
-       
+
 #ifdef RADEON_VTXFMT_A
        radeon_init_vtxfmt_a(r300);
 #endif
@@ -407,9 +404,9 @@ static void r300FreeGartAllocations(r300ContextPtr r300)
        /* Cannot flush/lock if no context exists. */
        if (in_use)
                r300FlushCmdBuf(r300, __FUNCTION__);
-       
+
        done_age = radeonGetAge((radeonContextPtr)r300);
-       
+
        for (i = r300->rmm->u_last; i > 0; i--) {
                if (r300->rmm->u_list[i].ptr == NULL) {
                        continue;
@@ -421,7 +418,7 @@ static void r300FreeGartAllocations(r300ContextPtr r300)
                }
 
                assert(r300->rmm->u_list[i].h_pending == 0);
-               
+
                tries = 0;
                while(r300->rmm->u_list[i].age > done_age && tries++ < 1000) {
                        usleep(10);
@@ -430,10 +427,10 @@ static void r300FreeGartAllocations(r300ContextPtr r300)
                if (tries >= 1000) {
                        WARN_ONCE("Failed to idle region!");
                }
-               
+
                memfree.region_offset = (char *)r300->rmm->u_list[i].ptr -
                        (char *)r300->radeon.radeonScreen->gartTextures.map;
-               
+
                ret = drmCommandWrite(r300->radeon.radeonScreen->driScreen->fd,
                                DRM_RADEON_FREE, &memfree, sizeof(memfree));
                if (ret) {
@@ -442,7 +439,7 @@ static void r300FreeGartAllocations(r300ContextPtr r300)
                } else {
                        if (i == r300->rmm->u_last)
                                r300->rmm->u_last--;
-                       
+
                        r300->rmm->u_list[i].pending = 0;
                        r300->rmm->u_list[i].ptr = NULL;
                        if (r300->rmm->u_list[i].fb) {
@@ -490,12 +487,12 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
                _tnl_DestroyContext(r300->radeon.glCtx);
                _vbo_DestroyContext(r300->radeon.glCtx);
                _swrast_DestroyContext(r300->radeon.glCtx);
-               
+
                if (r300->dma.current.buf) {
                        r300ReleaseDmaRegion(r300, &r300->dma.current, __FUNCTION__ );
 #ifndef USER_BUFFERS
                        r300FlushCmdBuf(r300, __FUNCTION__);
-#endif  
+#endif
                }
                r300FreeGartAllocations(r300);
                r300DestroyCmdBuf(r300);
index fa0f55446be7caaff5a0f8c9e0ea99186b8a3c37..c8b81d9849755ba0ab13bb647542bbdeb6545615 100644 (file)
@@ -553,6 +553,7 @@ struct r300_stencilbuffer_state {
 #define VSF_MAX_FRAGMENT_TEMPS (14)
 
 #define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
+#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1)
 
 struct r300_vertex_shader_fragment {
        int length;
index 0d7d1f1af296571e04058d3e6049a9176b3b13e4..68a75ec7f00d1d83e518eb8448c7b2534947d53e 100644 (file)
@@ -929,13 +929,40 @@ static void emit_tex(struct r300_fragment_program *rp,
        COMPILE_STATE;
        GLuint coord = t_src(rp, fpi->SrcReg[0]);
        GLuint dest = undef, rdest = undef;
-       GLuint din = cs->dest_in_node, uin = cs->used_in_node;
+       GLuint din, uin;
        int unit = fpi->TexSrcUnit;
        int hwsrc, hwdest;
+       GLuint tempreg = 0;
 
        /* Resolve source/dest to hardware registers */
-       hwsrc = t_hw_src(rp, coord, GL_TRUE);
        if (opcode != R300_FPITX_OP_KIL) {
+               if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) {
+                       /**
+                        * Hardware uses [0..1]x[0..1] range for rectangle textures
+                        * instead of [0..Width]x[0..Height].
+                        * Add a scaling instruction.
+                        *
+                        * \todo Refactor this once we have proper rewriting/optimization
+                        * support for programs.
+                        */
+                       GLint tokens[6] = { STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, 0, 0 };
+                       int factor_index;
+                       GLuint factorreg;
+
+                       tokens[2] = unit;
+                       factor_index = _mesa_add_state_reference(rp->mesa_program.Base.Parameters, tokens);
+                       factorreg = emit_const4fv(rp,
+                                       rp->mesa_program.Base.Parameters->ParameterValues[factor_index]);
+                       tempreg = keep(get_temp_reg(rp));
+
+                       emit_arith(rp, PFS_OP_MAD, tempreg, WRITEMASK_XYZW,
+                                  coord, factorreg, pfs_zero, 0);
+
+                       hwsrc = t_hw_src(rp, tempreg, GL_TRUE);
+               } else {
+                       hwsrc = t_hw_src(rp, coord, GL_TRUE);
+               }
+
                dest = t_dst(rp, fpi->DstReg);
 
                /* r300 doesn't seem to be able to do TEX->output reg */
@@ -956,8 +983,12 @@ static void emit_tex(struct r300_fragment_program *rp,
        } else {
                hwdest = 0;
                unit = 0;
+               hwsrc = t_hw_src(rp, coord, GL_TRUE);
        }
 
+       din = cs->dest_in_node;
+       uin = cs->used_in_node;
+
        /* Indirection if source has been written in this node, or if the
         * dest has been read/written in this node
         */
@@ -1009,6 +1040,10 @@ static void emit_tex(struct r300_fragment_program *rp,
                           pfs_one, pfs_zero, 0);
                free_temp(rp, dest);
        }
+
+       /* Free temp register */
+       if (tempreg != 0)
+               free_temp(rp, tempreg);
 }
 
 
index 0864558e8dc16e5a344dff6c1c69a517d544242d..63b21b9379abf5056bb47d24e0468441d8196770 100644 (file)
@@ -202,23 +202,23 @@ static void inline fire_EB(r300ContextPtr rmesa, unsigned long addr, int vertex_
        unsigned long t_addr;
        unsigned long magic_1, magic_2;
        GLcontext *ctx;
-       ctx = rmesa->radeon.glCtx; 
-       
+       ctx = rmesa->radeon.glCtx;
+
        assert(elt_size == 2 || elt_size == 4);
-       
+
        if(addr & (elt_size-1)){
                WARN_ONCE("Badly aligned buffer\n");
                return ;
        }
 #ifdef OPTIMIZE_ELTS
        addr_a = 0;
-       
+
        magic_1 = (addr % 32) / 4;
        t_addr = addr & (~0x1d);
        magic_2 = (vertex_count + 1 + (t_addr & 0x2)) / 2 + magic_1;
-       
+
        check_space(6);
-       
+
        start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0);
        if(elt_size == 4){
                e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
@@ -234,7 +234,7 @@ static void inline fire_EB(r300ContextPtr rmesa, unsigned long addr, int vertex_
                e32(R300_EB_UNK1 | (magic_1 << 16) | R300_EB_UNK2);
                e32(t_addr);
        }
-       
+
        if(elt_size == 4){
                e32(vertex_count /*+ addr_a/4*/); /* Total number of dwords needed? */
        } else {
@@ -249,11 +249,11 @@ static void inline fire_EB(r300ContextPtr rmesa, unsigned long addr, int vertex_
 #endif
 #else
        (void)magic_2, (void)magic_1, (void)t_addr;
-       
+
        addr_a = 0;
-       
+
        check_space(6);
-       
+
        start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0);
        if(elt_size == 4){
                e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
@@ -264,14 +264,14 @@ static void inline fire_EB(r300ContextPtr rmesa, unsigned long addr, int vertex_
        start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2);
        e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
        e32(addr /*& 0xffffffe3*/);
-       
+
        if(elt_size == 4){
                e32(vertex_count /*+ addr_a/4*/); /* Total number of dwords needed? */
        } else {
                e32((vertex_count+1)/2 /*+ addr_a/4*/); /* Total number of dwords needed? */
        }
        //cp_delay(rmesa, 1);
-#endif 
+#endif
 }
 
 static void r300_render_vb_primitive(r300ContextPtr rmesa,
@@ -303,12 +303,12 @@ static void r300_render_vb_primitive(r300ContextPtr rmesa,
                //e32(rmesa->state.Elts[start]);
                return;
        }
-       
+
        if(num_verts > 65535){ /* not implemented yet */
                WARN_ONCE("Too many elts\n");
                return;
        }
-       
+
        r300EmitElts(ctx, rmesa->state.VB.Elts, num_verts, rmesa->state.VB.elt_size);
        fire_EB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type, rmesa->state.VB.elt_size);
 #endif
@@ -328,7 +328,7 @@ GLboolean r300_run_vb_render(GLcontext *ctx,
        int cmd_written = 0;
        drm_radeon_cmd_header_t *cmd = NULL;
 
-   
+
        if (RADEON_DEBUG & DEBUG_PRIMS)
                fprintf(stderr, "%s\n", __FUNCTION__);
 
@@ -336,26 +336,26 @@ GLboolean r300_run_vb_render(GLcontext *ctx,
                TNLcontext *tnl = TNL_CONTEXT(ctx);
                radeon_vb_to_rvb(rmesa, VB, &tnl->vb);
        }
-       
+
        r300UpdateShaders(rmesa);
        if (r300EmitArrays(ctx))
                return GL_TRUE;
 
        r300UpdateShaderStates(rmesa);
-       
+
        reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
        e32(R300_RB3D_DSTCACHE_UNKNOWN_0A);
 
        reg_start(R300_RB3D_ZCACHE_CTLSTAT,0);
        e32(R300_RB3D_ZCACHE_UNKNOWN_03);
-       
+
        r300EmitState(rmesa);
-       
+
        for(i=0; i < VB->PrimitiveCount; i++){
                GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
                GLuint start = VB->Primitive[i].start;
                GLuint length = VB->Primitive[i].count;
-               
+
                r300_render_vb_primitive(rmesa, ctx, start, start + length, prim);
        }
 
@@ -453,11 +453,6 @@ int r300Fallback(GLcontext *ctx)
                /* GL_POINT_SPRITE_NV */
                FALLBACK_IF(ctx->Point.PointSprite);
 
-       /* Fallback for rectangular texture */
-       for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
-               if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_RECT_BIT)
-                       return R300_FALLBACK_TCL;
-
        return R300_FALLBACK_NONE;
 }
 
@@ -494,19 +489,19 @@ static GLboolean r300_run_tcl_render(GLcontext *ctx,
 {
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
        struct r300_vertex_program *vp;
-   
+
        hw_tcl_on=future_hw_tcl_on;
-   
+
        if (RADEON_DEBUG & DEBUG_PRIMS)
                fprintf(stderr, "%s\n", __FUNCTION__);
        if(hw_tcl_on == GL_FALSE)
                return GL_TRUE;
-       
+
        if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
                hw_tcl_on = GL_FALSE;
                return GL_TRUE;
        }
-       
+
        r300UpdateShaders(rmesa);
 
        vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
@@ -520,13 +515,13 @@ static GLboolean r300_run_tcl_render(GLcontext *ctx,
        TNLcontext *tnl = TNL_CONTEXT(ctx);
        struct tnl_cache *cache;
        struct tnl_cache_item *c;
-       
+
        cache = tnl->vp_cache;
        c = cache->items[0xc000cc0e % cache->size];
-       
+
        if(c && c->data == vp)
                vp->native = GL_FALSE;
-       
+
 #endif
 #if 0
        vp->native = GL_FALSE;
@@ -536,7 +531,7 @@ static GLboolean r300_run_tcl_render(GLcontext *ctx,
                return GL_TRUE;
        }
        //r300UpdateShaderStates(rmesa);
-       
+
        return r300_run_vb_render(ctx, stage);
 }
 
@@ -549,107 +544,3 @@ const struct tnl_pipeline_stage _r300_tcl_stage = {
        r300_run_tcl_render     /* run */
 };
 
-/* R300 texture rectangle expects coords in 0..1 range, not 0..dimension
- * as in the extension spec.  Need to translate here.
- *
- * Note that swrast expects 0..dimension, so if a fallback is active,
- * don't do anything.  (Maybe need to configure swrast to match hw)
- */
-struct texrect_stage_data {
-   GLvector4f texcoord[MAX_TEXTURE_UNITS];
-};
-
-#define TEXRECT_STAGE_DATA(stage) ((struct texrect_stage_data *)stage->privatePtr)
-
-
-static GLboolean run_texrect_stage( GLcontext *ctx,
-                                   struct tnl_pipeline_stage *stage )
-{
-   struct texrect_stage_data *store = TEXRECT_STAGE_DATA(stage);
-   r300ContextPtr rmesa = R300_CONTEXT(ctx);
-   TNLcontext *tnl = TNL_CONTEXT(ctx);
-   struct vertex_buffer *VB = &tnl->vb;
-   GLuint i;
-
-   if (rmesa->radeon.Fallback)
-      return GL_TRUE;
-
-   for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++) {
-      if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_RECT_BIT) {
-        struct gl_texture_object *texObj = ctx->Texture.Unit[i].CurrentRect;
-        struct gl_texture_image *texImage = texObj->Image[0][texObj->BaseLevel];
-        const GLfloat iw = 1.0/texImage->Width;
-        const GLfloat ih = 1.0/texImage->Height;
-        GLfloat *in = (GLfloat *)VB->TexCoordPtr[i]->data;
-        GLint instride = VB->TexCoordPtr[i]->stride;
-        GLfloat (*out)[4] = store->texcoord[i].data;
-        GLint j;
-
-        store->texcoord[i].size = VB->TexCoordPtr[i]->size;
-        for (j = 0 ; j < VB->Count ; j++) {
-           switch (VB->TexCoordPtr[i]->size) {
-           case 4:
-              out[j][3] = in[3];
-           /* fallthrough */
-           case 3:
-              out[j][2] = in[2];
-           /* fallthrough */
-           default:
-              out[j][0] = in[0] * iw;
-              out[j][1] = in[1] * ih;
-           }
-           in = (GLfloat *)((GLubyte *)in + instride);
-        }
-
-        VB->AttribPtr[VERT_ATTRIB_TEX0+i] = VB->TexCoordPtr[i] = &store->texcoord[i];
-      }
-   }
-
-   return GL_TRUE;
-}
-
-
-/* Called the first time stage->run() is invoked.
- */
-static GLboolean alloc_texrect_data( GLcontext *ctx,
-                                    struct tnl_pipeline_stage *stage )
-{
-   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
-   struct texrect_stage_data *store;
-   GLuint i;
-
-   stage->privatePtr = CALLOC(sizeof(*store));
-   store = TEXRECT_STAGE_DATA(stage);
-   if (!store)
-      return GL_FALSE;
-
-   for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++)
-      _mesa_vector4f_alloc( &store->texcoord[i], 0, VB->Size, 32 );
-
-   return GL_TRUE;
-}
-
-static void free_texrect_data( struct tnl_pipeline_stage *stage )
-{
-   struct texrect_stage_data *store = TEXRECT_STAGE_DATA(stage);
-   GLuint i;
-
-   if (store) {
-      for (i = 0 ; i < MAX_TEXTURE_UNITS ; i++)
-        if (store->texcoord[i].data)
-           _mesa_vector4f_free( &store->texcoord[i] );
-      FREE( store );
-      stage->privatePtr = NULL;
-   }
-}
-
-const struct tnl_pipeline_stage _r300_texrect_stage =
-{
-   "r300 texrect stage",                       /* name */
-   NULL,
-   alloc_texrect_data,
-   free_texrect_data,
-   NULL,
-   run_texrect_stage
-};
-
index 4fd80e60feeaaf3faa9292d9295baa4578b853bc..17658efdb2f38215955af0ed1870146b9d3f9518 100644 (file)
@@ -1058,23 +1058,41 @@ r300UpdateDrawBuffer(GLcontext *ctx)
 static void r300FetchStateParameter(GLcontext *ctx, const enum state_index state[],
                   GLfloat *value)
 {
-    r300ContextPtr r300 = R300_CONTEXT(ctx);
+       r300ContextPtr r300 = R300_CONTEXT(ctx);
 
-    switch(state[0])
-    {
-    case STATE_INTERNAL:
-       switch(state[1])
-       {
-       case STATE_R300_WINDOW_DIMENSION:
-           value[0] = r300->radeon.dri.drawable->w*0.5f;/* width*0.5 */
-           value[1] = r300->radeon.dri.drawable->h*0.5f;/* height*0.5 */
-           value[2] = 0.5F;                            /* for moving range [-1 1] -> [0 1] */
-           value[3] = 1.0F;                            /* not used */
-           break;
-       default:;
+       switch(state[0]) {
+       case STATE_INTERNAL:
+               switch(state[1]) {
+               case STATE_R300_WINDOW_DIMENSION:
+                       value[0] = r300->radeon.dri.drawable->w*0.5f;/* width*0.5 */
+                       value[1] = r300->radeon.dri.drawable->h*0.5f;/* height*0.5 */
+                       value[2] = 0.5F;                                /* for moving range [-1 1] -> [0 1] */
+                       value[3] = 1.0F;                                /* not used */
+                       break;
+
+               case STATE_R300_TEXRECT_FACTOR: {
+                       struct gl_texture_object* t = ctx->Texture.Unit[state[2]].CurrentRect;
+
+                       if (t && t->Image[0][t->BaseLevel]) {
+                               struct gl_texture_image* image = t->Image[0][t->BaseLevel];
+                               value[0] = 1.0 / image->Width2;
+                               value[1] = 1.0 / image->Height2;
+                       } else {
+                               value[0] = 1.0;
+                               value[1] = 1.0;
+                       }
+                       value[2] = 1.0;
+                       value[3] = 1.0;
+                       break; }
+
+               default:
+                       break;
+               }
+               break;
+
+       default:
+               break;
        }
-    default:;
-    }
 }
 
 /**