#include "radeon_tcl.h"
+#define RADEON_TXFORMAT_A8 RADEON_TXFORMAT_I8
+#define RADEON_TXFORMAT_L8 RADEON_TXFORMAT_I8
#define RADEON_TXFORMAT_AL88 RADEON_TXFORMAT_AI88
#define RADEON_TXFORMAT_YCBCR RADEON_TXFORMAT_YVYU422
#define RADEON_TXFORMAT_YCBCR_REV RADEON_TXFORMAT_VYUY422
+#define RADEON_TXFORMAT_RGB_DXT1 RADEON_TXFORMAT_DXT1
+#define RADEON_TXFORMAT_RGBA_DXT1 RADEON_TXFORMAT_DXT1
+#define RADEON_TXFORMAT_RGBA_DXT3 RADEON_TXFORMAT_DXT23
+#define RADEON_TXFORMAT_RGBA_DXT5 RADEON_TXFORMAT_DXT45
#define _COLOR(f) \
[ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f, 0 }
+#define _COLOR_REV(f) \
+ [ MESA_FORMAT_ ## f ## _REV ] = { RADEON_TXFORMAT_ ## f, 0 }
#define _ALPHA(f) \
[ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }
+#define _ALPHA_REV(f) \
+ [ MESA_FORMAT_ ## f ## _REV ] = { RADEON_TXFORMAT_ ## f | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }
#define _YUV(f) \
[ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f, RADEON_YUV_TO_RGB }
#define _INVALID(f) \
[ MESA_FORMAT_ ## f ] = { 0xffffffff, 0 }
-#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_YCBCR_REV) \
+#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \
&& (tx_table[f].format != 0xffffffff) )
static const struct {
tx_table[] =
{
_ALPHA(RGBA8888),
+ _ALPHA_REV(RGBA8888),
_ALPHA(ARGB8888),
+ _ALPHA_REV(ARGB8888),
_INVALID(RGB888),
_COLOR(RGB565),
+ _COLOR_REV(RGB565),
_ALPHA(ARGB4444),
+ _ALPHA_REV(ARGB4444),
_ALPHA(ARGB1555),
+ _ALPHA_REV(ARGB1555),
_ALPHA(AL88),
- _INVALID(A8),
- _INVALID(L8),
- _COLOR(I8),
+ _ALPHA_REV(AL88),
+ _ALPHA(A8),
+ _COLOR(L8),
+ _ALPHA(I8),
_INVALID(CI8),
_YUV(YCBCR),
_YUV(YCBCR_REV),
+ _INVALID(RGB_FXT1),
+ _INVALID(RGBA_FXT1),
+ _COLOR(RGB_DXT1),
+ _ALPHA(RGBA_DXT1),
+ _ALPHA(RGBA_DXT3),
+ _ALPHA(RGBA_DXT5),
};
#undef _COLOR
{
radeonTexObjPtr t = (radeonTexObjPtr)tObj->DriverData;
const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
- GLint curOffset;
- GLint i;
+ GLint curOffset, blitWidth;
+ GLint i, texelBytes;
GLint numLevels;
GLint log2Width, log2Height, log2Depth;
return;
}
+ texelBytes = baseImage->TexFormat->TexelBytes;
/* Compute which mipmap levels we really want to send to the hardware.
*/
* memory organized as a rectangle of width BLIT_WIDTH_BYTES.
*/
curOffset = 0;
+ blitWidth = BLIT_WIDTH_BYTES;
+ t->tile_bits = 0;
+
+ /* figure out if this texture is suitable for tiling. */
+ if (texelBytes && (tObj->Target != GL_TEXTURE_RECTANGLE_NV)) {
+ if (rmesa->texmicrotile && (baseImage->Height > 1)) {
+ /* allow 32 (bytes) x 1 mip (which will use two times the space
+ the non-tiled version would use) max if base texture is large enough */
+ if ((numLevels == 1) ||
+ (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
+ (baseImage->Width * texelBytes > 64)) ||
+ ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
+ /* R100 has two microtile bits (only the txoffset reg, not the blitter)
+ weird: X2 + OPT: 32bit correct, 16bit completely hosed
+ X2: 32bit correct, 16bit correct
+ OPT: 32bit large mips correct, small mips hosed, 16bit completely hosed */
+ t->tile_bits |= RADEON_TXO_MICRO_TILE_X2 /*| RADEON_TXO_MICRO_TILE_OPT*/;
+ }
+ }
+ if ((baseImage->Width * texelBytes >= 256) && (baseImage->Height >= 16)) {
+ /* R100 disables macro tiling only if mip width is smaller than 256 bytes, and not
+ in the case if height is smaller than 16 (not 100% sure), as does the r200,
+ so need to disable macro tiling in that case */
+ if ((numLevels == 1) || ((baseImage->Width * texelBytes / baseImage->Height) <= 4)) {
+ t->tile_bits |= RADEON_TXO_MACRO_TILE;
+ }
+ }
+ }
for (i = 0; i < numLevels; i++) {
const struct gl_texture_image *texImage;
/* find image size in bytes */
if (texImage->IsCompressed) {
- size = texImage->CompressedSize;
+ /* need to calculate the size AFTER padding even though the texture is
+ submitted without padding.
+ Only handle pot textures currently - don't know if npot is even possible,
+ size calculation would certainly need (trivial) adjustments.
+ Align (and later pad) to 32byte, not sure what that 64byte blit width is
+ good for? */
+ if ((t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) == RADEON_TXFORMAT_DXT1) {
+ /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */
+ if ((texImage->Width + 3) < 8) /* width one block */
+ size = texImage->CompressedSize * 4;
+ else if ((texImage->Width + 3) < 16)
+ size = texImage->CompressedSize * 2;
+ else size = texImage->CompressedSize;
+ }
+ else /* DXT3/5, 16 bytes per block */
+ if ((texImage->Width + 3) < 8)
+ size = texImage->CompressedSize * 2;
+ else size = texImage->CompressedSize;
}
else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
- size = ((texImage->Width * texImage->TexFormat->TexelBytes + 63)
- & ~63) * texImage->Height;
+ size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
+ }
+ else if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) {
+ /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
+ though the actual offset may be different (if texture is less than
+ 32 bytes width) to the untiled case */
+ int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
+ size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
+ blitWidth = MAX2(texImage->Width, 64 / texelBytes);
}
else {
- int w = texImage->Width * texImage->TexFormat->TexelBytes;
- if (w < 32)
- w = 32;
- size = w * texImage->Height * texImage->Depth;
+ int w = (texImage->Width * texelBytes + 31) & ~31;
+ size = w * texImage->Height * texImage->Depth;
+ blitWidth = MAX2(texImage->Width, 64 / texelBytes);
}
assert(size > 0);
-
/* Align to 32-byte offset. It is faster to do this unconditionally
* (no branch penalty).
*/
curOffset = (curOffset + 0x1f) & ~0x1f;
- t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
- t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
- t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES);
- t->image[0][i].height = size / t->image[0][i].width;
+ if (texelBytes) {
+ t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
+ t->image[0][i].y = 0;
+ t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
+ t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
+ }
+ else {
+ t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
+ t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
+ t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES);
+ t->image[0][i].height = size / t->image[0][i].width;
+ }
#if 0
/* for debugging only and only applicable to non-rectangle targets */
if (baseImage->IsCompressed)
t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
else
- t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * baseImage->TexFormat->TexelBytes) + 63) & ~(63);
+ t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
t->pp_txpitch -= 32;
t->dirty_state = TEX_ALL;
radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
GLuint color_combine, alpha_combine;
+ const GLuint color_combine0 = RADEON_COLOR_ARG_A_ZERO | RADEON_COLOR_ARG_B_ZERO
+ | RADEON_COLOR_ARG_C_CURRENT_COLOR | RADEON_BLEND_CTL_ADD
+ | RADEON_SCALE_1X | RADEON_CLAMP_TX;
+ const GLuint alpha_combine0 = RADEON_ALPHA_ARG_A_ZERO | RADEON_ALPHA_ARG_B_ZERO
+ | RADEON_ALPHA_ARG_C_CURRENT_ALPHA | RADEON_BLEND_CTL_ADD
+ | RADEON_SCALE_1X | RADEON_CLAMP_TX;
+
/* texUnit->_Current can be NULL if and only if the texture unit is
* not actually enabled.
* reduces the amount of special-casing we have to do, alpha-only
* textures being a notable exception.
*/
+ /* Don't cache these results.
+ */
+ rmesa->state.texture.unit[unit].format = 0;
+ rmesa->state.texture.unit[unit].envMode = 0;
+
if ( !texUnit->_ReallyEnabled ) {
- /* Don't cache these results.
- */
- rmesa->state.texture.unit[unit].format = 0;
- rmesa->state.texture.unit[unit].envMode = 0;
- color_combine = RADEON_COLOR_ARG_A_ZERO | RADEON_COLOR_ARG_B_ZERO
- | RADEON_COLOR_ARG_C_CURRENT_COLOR | RADEON_BLEND_CTL_ADD
- | RADEON_SCALE_1X | RADEON_CLAMP_TX;
- alpha_combine = RADEON_ALPHA_ARG_A_ZERO | RADEON_ALPHA_ARG_B_ZERO
- | RADEON_ALPHA_ARG_C_CURRENT_ALPHA | RADEON_BLEND_CTL_ADD
- | RADEON_SCALE_1X | RADEON_CLAMP_TX;
+ color_combine = color_combine0;
+ alpha_combine = alpha_combine0;
}
else {
GLuint color_arg[3], alpha_arg[3];
GLuint RGBshift = texUnit->_CurrentCombine->ScaleShiftRGB;
GLuint Ashift = texUnit->_CurrentCombine->ScaleShiftA;
- /* Don't cache these results.
- */
- rmesa->state.texture.unit[unit].format = 0;
- rmesa->state.texture.unit[unit].envMode = 0;
-
/* Step 1:
* Extract the color and alpha combine function arguments.
*/
for ( i = 0 ; i < numColorArgs ; i++ ) {
- const GLuint op = texUnit->_CurrentCombine->OperandRGB[i] - GL_SRC_COLOR;
+ const GLint op = texUnit->_CurrentCombine->OperandRGB[i] - GL_SRC_COLOR;
+ const GLuint srcRGBi = texUnit->_CurrentCombine->SourceRGB[i];
assert(op >= 0);
assert(op <= 3);
- switch ( texUnit->_CurrentCombine->SourceRGB[i] ) {
+ switch ( srcRGBi ) {
case GL_TEXTURE:
color_arg[i] = radeon_texture_color[op][unit];
break;
case GL_ONE:
color_arg[i] = radeon_zero_color[op+1];
break;
+ case GL_TEXTURE0:
+ case GL_TEXTURE1:
+ case GL_TEXTURE2:
+ /* implement ogl 1.4/1.5 core spec here, not specification of
+ * GL_ARB_texture_env_crossbar (which would require disabling blending
+ * instead of undefined results when referencing not enabled texunit) */
+ color_arg[i] = radeon_texture_color[op][srcRGBi - GL_TEXTURE0];
+ break;
default:
return GL_FALSE;
}
}
for ( i = 0 ; i < numAlphaArgs ; i++ ) {
- const GLuint op = texUnit->_CurrentCombine->OperandA[i] - GL_SRC_ALPHA;
+ const GLint op = texUnit->_CurrentCombine->OperandA[i] - GL_SRC_ALPHA;
+ const GLuint srcAi = texUnit->_CurrentCombine->SourceA[i];
assert(op >= 0);
assert(op <= 1);
- switch ( texUnit->_CurrentCombine->SourceA[i] ) {
+ switch ( srcAi ) {
case GL_TEXTURE:
alpha_arg[i] = radeon_texture_alpha[op][unit];
break;
case GL_ONE:
alpha_arg[i] = radeon_zero_alpha[op+1];
break;
+ case GL_TEXTURE0:
+ case GL_TEXTURE1:
+ case GL_TEXTURE2:
+ alpha_arg[i] = radeon_texture_alpha[op][srcAi - GL_TEXTURE0];
+ break;
default:
return GL_FALSE;
}
* 1.3) does.
*/
RGBshift = 0;
- Ashift = 0;
/* FALLTHROUGH */
case GL_DOT3_RGB:
}
RGBshift += 2;
- Ashift = RGBshift;
+ if ( (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA_EXT)
+ || (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA) ) {
+ /* is it necessary to set this or will it be ignored anyway? */
+ Ashift = RGBshift;
+ }
color_combine = (RADEON_COLOR_ARG_C_ZERO |
RADEON_BLEND_CTL_DOT3 |