Merge master and fix conflicts
authorAlex Deucher <alexdeucher@gmail.com>
Fri, 12 Jun 2009 16:09:34 +0000 (12:09 -0400)
committerAlex Deucher <alexdeucher@gmail.com>
Fri, 12 Jun 2009 16:09:34 +0000 (12:09 -0400)
15 files changed:
1  2 
configure.ac
src/mesa/drivers/dri/r600/r700_assembler.c
src/mesa/drivers/dri/r600/r700_clear.c
src/mesa/drivers/dri/r600/r700_state.c
src/mesa/drivers/dri/radeon/radeon_bo_legacy.c
src/mesa/drivers/dri/radeon/radeon_common.c
src/mesa/drivers/dri/radeon/radeon_common.h
src/mesa/drivers/dri/radeon/radeon_common_context.c
src/mesa/drivers/dri/radeon/radeon_common_context.h
src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
src/mesa/drivers/dri/radeon/radeon_dma.c
src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
src/mesa/drivers/dri/radeon/radeon_screen.c
src/mesa/drivers/dri/radeon/radeon_screen.h
src/mesa/drivers/dri/radeon/radeon_texture.c

diff --cc configure.ac
Simple merge
index 9f1d1a3a44859458b804dc07636148e4787f1d8f,0000000000000000000000000000000000000000..4666518824fe0dc59ebaec5d0fbc1c9244a778f8
mode 100644,000000..100644
--- /dev/null
@@@ -1,4067 -1,0 +1,4067 @@@
-     pAsm->S[fld].src.negx = pILInst->SrcReg[src].NegateBase & 0x1;
-     pAsm->S[fld].src.negy = (pILInst->SrcReg[src].NegateBase >> 1) & 0x1;
-     pAsm->S[fld].src.negz = (pILInst->SrcReg[src].NegateBase >> 2) & 0x1;
-     pAsm->S[fld].src.negw = (pILInst->SrcReg[src].NegateBase >> 3) & 0x1;
 +/*
 + * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the "Software"),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice shall be included
 + * in all copies or substantial portions of the Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
 + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 + */
 +
 +/*
 + * Authors:
 + *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
 + */
 +
 +#include <stdio.h>
 +#include <stdarg.h>
 +#include <stdlib.h>
 +#include <string.h>
 +#include <math.h>
 +
 +#include "main/mtypes.h"
 +#include "main/imports.h"
 +
 +#include "r600_context.h"
 +#include "r700_debug.h"
 +
 +#include "r700_assembler.h"
 +
 +BITS addrmode_PVSDST(PVSDST * pPVSDST)
 +{
 +      return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
 +}
 +
 +void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode) 
 +{
 +      pPVSDST->addrmode0 = addrmode & 1;
 +      pPVSDST->addrmode1 = (addrmode >> 1) & 1;
 +}
 +
 +void nomask_PVSDST(PVSDST * pPVSDST) 
 +{
 +      pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1;
 +}
 +
 +BITS addrmode_PVSSRC(PVSSRC* pPVSSRC) 
 +{
 +      return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1);
 +}
 +
 +void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode) 
 +{
 +      pPVSSRC->addrmode0 = addrmode & 1;
 +      pPVSSRC->addrmode1 = (addrmode >> 1) & 1;
 +}
 +
 +
 +void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz) 
 +{
 +      pPVSSRC->swizzlex = 
 +      pPVSSRC->swizzley = 
 +      pPVSSRC->swizzlez = 
 +      pPVSSRC->swizzlew = swz;
 +}
 +
 +void noswizzle_PVSSRC(PVSSRC* pPVSSRC) 
 +{
 +      pPVSSRC->swizzlex = SQ_SEL_X;
 +      pPVSSRC->swizzley = SQ_SEL_Y;
 +      pPVSSRC->swizzlez = SQ_SEL_Z;
 +      pPVSSRC->swizzlew = SQ_SEL_W;
 +}
 +
 +void
 +swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w)
 +{
 +    switch (x) 
 +    {
 +        case SQ_SEL_X: x = pPVSSRC->swizzlex; 
 +            break;
 +        case SQ_SEL_Y: x = pPVSSRC->swizzley; 
 +            break;
 +        case SQ_SEL_Z: x = pPVSSRC->swizzlez; 
 +            break;
 +        case SQ_SEL_W: x = pPVSSRC->swizzlew; 
 +            break;
 +        default:;
 +    }
 +
 +    switch (y) 
 +    {
 +        case SQ_SEL_X: y = pPVSSRC->swizzlex; 
 +            break;
 +        case SQ_SEL_Y: y = pPVSSRC->swizzley; 
 +            break;
 +        case SQ_SEL_Z: y = pPVSSRC->swizzlez; 
 +            break;
 +        case SQ_SEL_W: y = pPVSSRC->swizzlew; 
 +            break;
 +        default:;
 +    }
 +
 +    switch (z) 
 +    {
 +        case SQ_SEL_X: z = pPVSSRC->swizzlex; 
 +            break;
 +        case SQ_SEL_Y: z = pPVSSRC->swizzley; 
 +            break;
 +        case SQ_SEL_Z: z = pPVSSRC->swizzlez; 
 +            break;
 +        case SQ_SEL_W: z = pPVSSRC->swizzlew; 
 +            break;
 +        default:;
 +    }
 +
 +    switch (w) 
 +    {
 +        case SQ_SEL_X: w = pPVSSRC->swizzlex; 
 +            break;
 +        case SQ_SEL_Y: w = pPVSSRC->swizzley; 
 +            break;
 +        case SQ_SEL_Z: w = pPVSSRC->swizzlez; 
 +            break;
 +        case SQ_SEL_W: w = pPVSSRC->swizzlew; 
 +            break;
 +        default:;
 +    }
 +
 +    pPVSSRC->swizzlex = x;
 +    pPVSSRC->swizzley = y;
 +    pPVSSRC->swizzlez = z;
 +    pPVSSRC->swizzlew = w;
 +}
 +
 +void neg_PVSSRC(PVSSRC* pPVSSRC) 
 +{
 +      pPVSSRC->negx = 1;
 +      pPVSSRC->negy = 1;
 +      pPVSSRC->negz = 1;
 +      pPVSSRC->negw = 1;
 +}
 +
 +void noneg_PVSSRC(PVSSRC* pPVSSRC) 
 +{
 +      pPVSSRC->negx = 0;
 +      pPVSSRC->negy = 0;
 +      pPVSSRC->negz = 0;
 +      pPVSSRC->negw = 0;
 +}
 +
 +// negate argument (for SUB instead of ADD and alike)
 +void flipneg_PVSSRC(PVSSRC* pPVSSRC) 
 +{
 +      pPVSSRC->negx = !pPVSSRC->negx;
 +      pPVSSRC->negy = !pPVSSRC->negy;
 +      pPVSSRC->negz = !pPVSSRC->negz;
 +      pPVSSRC->negw = !pPVSSRC->negw;
 +}
 +
 +void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c) 
 +{
 +      switch (c) 
 +      {
 +              case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break;
 +              case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break;
 +              case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break;
 +              case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break;
 +              default:;
 +      } 
 +}
 +
 +void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c) 
 +{
 +      switch (c) 
 +      {
 +              case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break;
 +              case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break;
 +              case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break;
 +              case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break;
 +              default:;
 +      } 
 +}
 +
 +BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0)  
 +{
 +        return (pOutVTXFmt0->point_size            |
 +                        pOutVTXFmt0->edge_flag             |
 +                        pOutVTXFmt0->rta_index             |
 +                        pOutVTXFmt0->kill_flag             |
 +                        pOutVTXFmt0->viewport_index);
 +}
 +
 +BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) 
 +{
 +        return (pFPOutFmt->depth            | 
 +                        pFPOutFmt->stencil_ref      | 
 +                        pFPOutFmt->mask             | 
 +                        pFPOutFmt->coverage_to_mask);
 +}
 +
 +GLboolean is_reduction_opcode(PVSDWORD* dest)
 +{
 +    if (dest->dst.op3 == 0) 
 +    {
 +        if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE) ) 
 +        {
 +            return GL_TRUE;
 +        }
 +    }
 +    return GL_FALSE;
 +}
 +
 +GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
 +{
 +    GLuint format = FMT_INVALID;
 +      GLuint uiElemSize = 0;
 +
 +    switch (eType)
 +    {
 +        case GL_BYTE:
 +        case GL_UNSIGNED_BYTE:
 +                      uiElemSize = 1;
 +            switch(nChannels)
 +            {
 +                case 1:
 +                    format = FMT_8; break;
 +                case 2:
 +                    format = FMT_8_8; break;
 +                case 3:
 +                    format = FMT_8_8_8; break;
 +                case 4:
 +                    format = FMT_8_8_8_8; break;
 +                default:
 +                    break;
 +            }
 +            break;
 +
 +        case GL_UNSIGNED_SHORT:
 +        case GL_SHORT:
 +                      uiElemSize = 2;
 +            switch(nChannels)
 +            {
 +                case 1:
 +                    format = FMT_16; break;
 +                case 2:
 +                    format = FMT_16_16; break;
 +                case 3:
 +                    format = FMT_16_16_16; break;
 +                case 4:
 +                    format = FMT_16_16_16_16; break;
 +                default:
 +                    break;
 +            }
 +            break;
 +
 +        case GL_UNSIGNED_INT:
 +        case GL_INT:
 +                      uiElemSize = 4;
 +            switch(nChannels)
 +            {
 +                case 1:
 +                    format = FMT_32; break;
 +                case 2:
 +                    format = FMT_32_32; break;
 +                case 3:
 +                    format = FMT_32_32_32; break;
 +                case 4:
 +                    format = FMT_32_32_32_32; break;
 +                default:
 +                    break;
 +            }
 +            break;
 +
 +        case GL_FLOAT:
 +                      uiElemSize = 4;
 +                      switch(nChannels)
 +            {
 +                case 1:
 +                    format = FMT_32_FLOAT; break;
 +                case 2:
 +                    format = FMT_32_32_FLOAT; break;
 +                case 3:
 +                    format = FMT_32_32_32_FLOAT; break;
 +                case 4:
 +                    format = FMT_32_32_32_32_FLOAT; break;
 +                default:
 +                    break;
 +            }
 +                      break;
 +        case GL_DOUBLE:
 +                      uiElemSize = 8;
 +            switch(nChannels)
 +            {
 +                case 1:
 +                    format = FMT_32_FLOAT; break;
 +                case 2:
 +                    format = FMT_32_32_FLOAT; break;
 +                case 3:
 +                    format = FMT_32_32_32_FLOAT; break;
 +                case 4:
 +                    format = FMT_32_32_32_32_FLOAT; break;
 +                default:
 +                    break;
 +            }
 +            break;
 +        default:
 +                      ;
 +            //GL_ASSERT_NO_CASE();
 +    }
 +
 +    if(NULL != pClient_size)
 +    {
 +          *pClient_size = uiElemSize * nChannels;
 +    }
 +
 +    return(format);
 +}
 +
 +unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
 +{
 +    if(pAsm->D.dst.op3)
 +    {
 +        return 3;
 +    }
 +
 +    switch (pAsm->D.dst.opcode)
 +    {
 +    case SQ_OP2_INST_ADD:                          
 +    case SQ_OP2_INST_MUL: 
 +    case SQ_OP2_INST_MAX:
 +    case SQ_OP2_INST_MIN:
 +    //case SQ_OP2_INST_MAX_DX10:
 +    //case SQ_OP2_INST_MIN_DX10:
 +    case SQ_OP2_INST_SETGT:
 +    case SQ_OP2_INST_SETGE:
 +    case SQ_OP2_INST_PRED_SETE:
 +    case SQ_OP2_INST_PRED_SETGT:
 +    case SQ_OP2_INST_PRED_SETGE:
 +    case SQ_OP2_INST_PRED_SETNE:
 +    case SQ_OP2_INST_DOT4:
 +    case SQ_OP2_INST_DOT4_IEEE:
 +        return 2;  
 +
 +    case SQ_OP2_INST_MOV: 
 +    case SQ_OP2_INST_FRACT:
 +    case SQ_OP2_INST_FLOOR:
 +    case SQ_OP2_INST_KILLGT:
 +    case SQ_OP2_INST_EXP_IEEE:
 +    case SQ_OP2_INST_LOG_CLAMPED:
 +    case SQ_OP2_INST_LOG_IEEE:
 +    case SQ_OP2_INST_RECIP_IEEE:
 +    case SQ_OP2_INST_RECIPSQRT_IEEE:
 +    case SQ_OP2_INST_FLT_TO_INT:
 +    case SQ_OP2_INST_SIN:
 +    case SQ_OP2_INST_COS:
 +        return 1;
 +        
 +    default: r700_error(TODO_ASM_NEEDIMPINST, 
 +                        "Need instruction operand number. \n");;
 +    };
 +
 +    return 3;
 +}
 +
 +int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader)
 +{
 +    GLuint i;
 +
 +    Init_R700_Shader(pShader);
 +    pAsm->pR700Shader = pShader;
 +    pAsm->currentShaderType = spt;
 +
 +      pAsm->cf_last_export_ptr   = NULL;
 +
 +      pAsm->cf_current_export_clause_ptr = NULL;
 +      pAsm->cf_current_alu_clause_ptr    = NULL;
 +      pAsm->cf_current_tex_clause_ptr    = NULL;
 +      pAsm->cf_current_vtx_clause_ptr    = NULL;
 +      pAsm->cf_current_cf_clause_ptr     = NULL;
 +
 +      // No clause has been created yet
 +      pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
 +
 +      pAsm->number_of_colorandz_exports = 0;
 +      pAsm->number_of_exports           = 0;
 +      pAsm->number_of_export_opcodes    = 0;
 +
 +
 +      pAsm->D.bits = 0;
 +      pAsm->S[0].bits = 0;
 +      pAsm->S[1].bits = 0;
 +      pAsm->S[2].bits = 0;
 +
 +      pAsm->uLastPosUpdate = 0; 
 +      
 +      *(BITS *) &pAsm->fp_stOutFmt0 = 0;
 +
 +      pAsm->uIIns = 0;
 +      pAsm->uOIns = 0;
 +      pAsm->number_used_registers = 0;
 +      pAsm->uUsedConsts = 256; 
 +
 +
 +      // Fragment programs
 +      pAsm->uBoolConsts = 0;
 +      pAsm->uIntConsts = 0;
 +      pAsm->uInsts = 0;
 +      pAsm->uConsts = 0;
 +
 +      pAsm->FCSP = 0;
 +      pAsm->fc_stack[0].type = FC_NONE;
 +
 +      pAsm->branch_depth     = 0;
 +      pAsm->max_branch_depth = 0;
 +
 +      pAsm->aArgSubst[0] =
 +      pAsm->aArgSubst[1] =
 +      pAsm->aArgSubst[2] =
 +      pAsm->aArgSubst[3] = (-1);
 +
 +      pAsm->uOutputs = 0;
 +
 +      for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++) 
 +      {
 +              pAsm->color_export_register_number[i] = (-1);
 +      }
 +
 +
 +      pAsm->depth_export_register_number = (-1);
 +      pAsm->stencil_export_register_number = (-1);
 +      pAsm->coverage_to_mask_export_register_number = (-1);
 +      pAsm->mask_export_register_number = (-1);
 +
 +      pAsm->starting_export_register_number = 0;
 +      pAsm->starting_vfetch_register_number = 0;
 +      pAsm->starting_temp_register_number   = 0;
 +      pAsm->uFirstHelpReg = 0;
 +
 +
 +      pAsm->input_position_is_used = GL_FALSE;
 +      pAsm->input_normal_is_used   = GL_FALSE;
 +
 +
 +      for (i=0; i<NUMBER_OF_INPUT_COLORS; i++) 
 +      {
 +              pAsm->input_color_is_used[ i ] = GL_FALSE;
 +      }
 +
 +      for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++) 
 +      {
 +              pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
 +      }
 +
 +      for (i=0; i<VERT_ATTRIB_MAX; i++) 
 +      {
 +              pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
 +      }
 +
 +      pAsm->number_of_inputs = 0;
 +
 +      return 0;
 +}
 +
 +GLboolean IsTex(gl_inst_opcode Opcode)
 +{
 +    if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) )
 +    {
 +        return GL_TRUE;
 +    }
 +    return GL_FALSE;
 +}
 +
 +GLboolean IsAlu(gl_inst_opcode Opcode)
 +{
 +    //TODO : more for fc and ex for higher spec.
 +    if( IsTex(Opcode) )
 +    {
 +        return GL_FALSE;
 +    }
 +    return GL_TRUE;
 +}
 +
 +int check_current_clause(r700_AssemblerBase* pAsm,
 +                                           CF_CLAUSE_TYPE      new_clause_type)
 +{
 +      if (pAsm->cf_current_clause_type != new_clause_type) 
 +      {       //Close last open clause
 +              switch (pAsm->cf_current_clause_type) 
 +              {
 +              case CF_ALU_CLAUSE:
 +                      if ( pAsm->cf_current_alu_clause_ptr != NULL) 
 +            {
 +                pAsm->cf_current_alu_clause_ptr = NULL;
 +            }
 +                      break;
 +              case CF_VTX_CLAUSE:
 +                      if ( pAsm->cf_current_vtx_clause_ptr != NULL) 
 +            {
 +                pAsm->cf_current_vtx_clause_ptr = NULL;
 +            }
 +                      break;
 +              case CF_TEX_CLAUSE:
 +                      if ( pAsm->cf_current_tex_clause_ptr != NULL) 
 +            {
 +                pAsm->cf_current_tex_clause_ptr = NULL;
 +            }
 +                      break;
 +              case CF_EXPORT_CLAUSE:
 +                      if ( pAsm->cf_current_export_clause_ptr != NULL) 
 +            {
 +                pAsm->cf_current_export_clause_ptr = NULL;
 +            }
 +                      break;
 +              case CF_OTHER_CLAUSE:
 +                      if ( pAsm->cf_current_cf_clause_ptr != NULL) 
 +            {
 +                pAsm->cf_current_cf_clause_ptr = NULL;
 +            }
 +                      break;
 +              case CF_EMPTY_CLAUSE:
 +                      break;
 +              default:
 +            r700_error(ERROR_ASM_VTX_CLAUSE,
 +                       "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
 +                      return GL_FALSE;
 +              }
 +
 +        pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
 +
 +              // Create new clause
 +        switch (new_clause_type) 
 +          {
 +        case CF_ALU_CLAUSE:
 +            pAsm->cf_current_clause_type = CF_ALU_CLAUSE;
 +            break;
 +        case CF_VTX_CLAUSE:
 +            pAsm->cf_current_clause_type = CF_VTX_CLAUSE;
 +            break;
 +        case CF_TEX_CLAUSE:        
 +            pAsm->cf_current_clause_type = CF_TEX_CLAUSE;
 +            break;
 +        case CF_EXPORT_CLAUSE:
 +            {
 +                R700ControlFlowSXClause* pR700ControlFlowSXClause 
 +                            = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause); 
 +            
 +                // Add new export instruction to control flow program        
 +                if (pR700ControlFlowSXClause != 0) 
 +                {
 +                    pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause;
 +                    Init_R700ControlFlowSXClause(pR700ControlFlowSXClause);
 +                    AddCFInstruction( pAsm->pR700Shader, 
 +                                      (R700ControlFlowInstruction *)pR700ControlFlowSXClause );
 +                }
 +                else 
 +                {
 +                    r700_error(ERROR_ASM_ALLOCEXPORTCF,
 +                               "Error allocating new EXPORT CF instruction in check_current_clause. \n");
 +                    return GL_FALSE;
 +                }
 +                pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE;
 +            }
 +            break;
 +        case CF_EMPTY_CLAUSE:
 +            break;
 +        case CF_OTHER_CLAUSE:
 +            pAsm->cf_current_clause_type = CF_OTHER_CLAUSE;
 +            break;
 +        default:
 +            r700_error(ERROR_ASM_UNKOWNCLAUSE,
 +                       "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
 +            return GL_FALSE;
 +        }
 +    }
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean add_vfetch_instruction(r700_AssemblerBase*     pAsm,
 +                                                               R700VertexInstruction*  vertex_instruction_ptr)
 +{
 +      if( GL_FALSE == check_current_clause(pAsm,  CF_VTX_CLAUSE) )
 +      {
 +              return GL_FALSE;
 +      }
 +
 +    if( pAsm->cf_current_vtx_clause_ptr == NULL ||
 +        ( (pAsm->cf_current_vtx_clause_ptr != NULL) && 
 +         (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1) 
 +        ) ) 
 +    { 
 +              // Create new Vfetch control flow instruction for this new clause
 +              pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
 +
 +              if (pAsm->cf_current_vtx_clause_ptr != NULL) 
 +              {
 +                      Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr);
 +                      AddCFInstruction( pAsm->pR700Shader, 
 +                              (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr );
 +              }
 +              else 
 +              {
 +            r700_error(ERROR_ASM_ALLOCVTXCF, "Could not allocate a new VFetch CF instruction.");
 +                      return GL_FALSE;
 +              }
 +
 +              pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count        = 0x0;
 +              pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const         = 0x0;
 +              pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
 +              pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count            = 0x0;
 +              pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program   = 0x0;
 +              pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
 +              pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_VTX;
 +              pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
 +              pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier          = 0x1;
 +
 +              LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr );
 +      }
 +      else
 +      {
 +              pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++;
 +      }
 +
 +      AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr);
 +
 +      return GL_TRUE;
 +}
 +
 +GLboolean add_tex_instruction(r700_AssemblerBase*     pAsm,
 +                              R700TextureInstruction* tex_instruction_ptr)
 +{ 
 +    if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if ( pAsm->cf_current_tex_clause_ptr == NULL ||
 +         ( (pAsm->cf_current_tex_clause_ptr != NULL) && 
 +           (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1) 
 +         ) ) 
 +    {
 +        // new tex cf instruction for this new clause  
 +        pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
 +
 +              if (pAsm->cf_current_tex_clause_ptr != NULL) 
 +              {
 +                      Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr);
 +                      AddCFInstruction( pAsm->pR700Shader, 
 +                              (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr );
 +              }
 +              else 
 +              {
 +            r700_error(ERROR_ASM_ALLOCTEXCF, "Could not allocate a new TEX CF instruction.");
 +                      return GL_FALSE;
 +              }
 +        
 +        pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count        = 0x0;
 +        pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const         = 0x0;
 +        pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
 +
 +        pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program   = 0x0;
 +        pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
 +        pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_TEX;
 +        pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
 +        pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier          = 0x0;   //0x1;
 +    }
 +    else 
 +    {        
 +        pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++;
 +    }
 +
 +    // If this clause constains any TEX instruction that is dependent on a previous instruction, 
 +    // set the barrier bit
 +    if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) )
 +    {
 +        pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1;  
 +    }
 +
 +    if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction)
 +    {
 +        pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr;
 +        tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr;
 +    }
 +
 +    AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr);
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
 +                                                              GLuint gl_client_id,
 +                                GLuint destination_register,
 +                                                              GLuint number_of_elements,
 +                                GLenum dataElementType,
 +                                                              VTX_FETCH_METHOD* pFetchMethod)
 +{
 +    GLuint client_size_inbyte;
 +      GLuint data_format;
 +    GLuint mega_fetch_count;
 +      GLuint is_mega_fetch_flag;
 +
 +      R700VertexGenericFetch*   vfetch_instruction_ptr;
 +      R700VertexGenericFetch*   assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ];
 +
 +      if (assembled_vfetch_instruction_ptr == NULL) 
 +      {
 +              vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
 +              if (vfetch_instruction_ptr == NULL) 
 +              {
 +                      return GL_FALSE;
 +              }
 +        Init_R700VertexGenericFetch(vfetch_instruction_ptr);
 +    }
 +      else 
 +      {
 +              vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
 +      }
 +
 +      data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte);
 +
 +      if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
 +      {
 +              //TODO : mini fetch
 +      }
 +      else
 +      {
 +              mega_fetch_count = MEGA_FETCH_BYTES - 1;
 +              is_mega_fetch_flag       = 0x1;
 +              pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
 +      }
 +
 +      vfetch_instruction_ptr->m_Word0.f.vtx_inst         = SQ_VTX_INST_FETCH;
 +      vfetch_instruction_ptr->m_Word0.f.fetch_type       = SQ_VTX_FETCH_VERTEX_DATA;
 +      vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
 +
 +      vfetch_instruction_ptr->m_Word0.f.buffer_id        = gl_client_id;
 +      vfetch_instruction_ptr->m_Word0.f.src_gpr          = 0x0; 
 +      vfetch_instruction_ptr->m_Word0.f.src_rel          = SQ_ABSOLUTE;
 +      vfetch_instruction_ptr->m_Word0.f.src_sel_x        = SQ_SEL_X;
 +      vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
 +
 +      vfetch_instruction_ptr->m_Word1.f.dst_sel_x        = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X;
 +      vfetch_instruction_ptr->m_Word1.f.dst_sel_y        = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y;
 +      vfetch_instruction_ptr->m_Word1.f.dst_sel_z        = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z;
 +      vfetch_instruction_ptr->m_Word1.f.dst_sel_w        = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W;
 +
 +      vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
 +
 +      // Destination register
 +      vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register; 
 +      vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
 +
 +      vfetch_instruction_ptr->m_Word2.f.offset              = 0;
 +      vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
 +
 +      vfetch_instruction_ptr->m_Word2.f.mega_fetch          = is_mega_fetch_flag;
 +
 +      if (assembled_vfetch_instruction_ptr == NULL) 
 +      {
 +              if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) ) 
 +        {   
 +                      return GL_FALSE;
 +              }
 +
 +              if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL) 
 +              {
 +                      return GL_FALSE;
 +              }
 +              else 
 +              {
 +                      pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr;
 +              }
 +      }
 +
 +      return GL_TRUE;
 +}
 +
 +GLuint gethelpr(r700_AssemblerBase* pAsm) 
 +{
 +    GLuint r = pAsm->uHelpReg;
 +    pAsm->uHelpReg++;
 +    if (pAsm->uHelpReg > pAsm->number_used_registers)
 +    {
 +        pAsm->number_used_registers = pAsm->uHelpReg;
 +      }
 +    return r;
 +}
 +void resethelpr(r700_AssemblerBase* pAsm) 
 +{
 +    pAsm->uHelpReg = pAsm->uFirstHelpReg;
 +}
 +
 +void checkop_init(r700_AssemblerBase* pAsm)
 +{
 +    resethelpr(pAsm);
 +    pAsm->aArgSubst[0] =
 +    pAsm->aArgSubst[1] =
 +    pAsm->aArgSubst[2] =
 +    pAsm->aArgSubst[3] = -1;
 +}
 +
 +GLboolean mov_temp(r700_AssemblerBase* pAsm, int src)
 +{
 +    GLuint tmp = gethelpr(pAsm);
 +
 +    //mov src to temp helper gpr.
 +    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
 +
 +    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
 +  
 +    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
 +    pAsm->D.dst.reg   = tmp;
 +
 +    nomask_PVSDST(&(pAsm->D.dst));
 +
 +    if( GL_FALSE == assemble_src(pAsm, src, 0) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    noswizzle_PVSSRC(&(pAsm->S[0].src));
 +    noneg_PVSSRC(&(pAsm->S[0].src));
 +   
 +    if( GL_FALSE == next_ins(pAsm) ) 
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    pAsm->aArgSubst[1 + src] = tmp;
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean checkop1(r700_AssemblerBase* pAsm)
 +{
 +    checkop_init(pAsm);
 +    return GL_TRUE;
 +}
 +
 +GLboolean checkop2(r700_AssemblerBase* pAsm)
 +{
 +    GLboolean bSrcConst[2];
 +    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
 +
 +    checkop_init(pAsm);
 +
 +    if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT)    ||
 +        (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
 +        (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM)   ||
 +        (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
 +    {
 +        bSrcConst[0] = GL_TRUE;
 +    }
 +    else
 +    {
 +        bSrcConst[0] = GL_FALSE;
 +    }
 +    if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT)    ||
 +        (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
 +        (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM)   ||
 +        (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
 +    {
 +        bSrcConst[1] = GL_TRUE;
 +    }
 +    else
 +    {
 +        bSrcConst[1] = GL_FALSE;
 +    }
 +
 +    if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) )
 +    {
 +        if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
 +        {
 +            if( GL_FALSE == mov_temp(pAsm, 1) )
 +            {
 +                return GL_FALSE;
 +            }
 +        }
 +    }
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean checkop3(r700_AssemblerBase* pAsm)
 +{
 +    GLboolean bSrcConst[3];
 +    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
 +
 +    checkop_init(pAsm);
 +
 +    if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT)    ||
 +        (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
 +        (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM)   ||
 +        (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
 +    {
 +        bSrcConst[0] = GL_TRUE;
 +    }
 +    else
 +    {
 +        bSrcConst[0] = GL_FALSE;
 +    }
 +    if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT)    ||
 +        (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
 +        (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM)   ||
 +        (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
 +    {
 +        bSrcConst[1] = GL_TRUE;
 +    }
 +    else
 +    {
 +        bSrcConst[1] = GL_FALSE;
 +    }
 +    if( (pILInst->SrcReg[2].File == PROGRAM_CONSTANT)    ||
 +        (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
 +        (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM)   ||
 +        (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
 +    {
 +        bSrcConst[2] = GL_TRUE;
 +    }
 +    else
 +    {
 +        bSrcConst[2] = GL_FALSE;
 +    }
 +
 +    if( (GL_TRUE == bSrcConst[0]) && 
 +        (GL_TRUE == bSrcConst[1]) && 
 +        (GL_TRUE == bSrcConst[2]) ) 
 +    {
 +        if( GL_FALSE == mov_temp(pAsm, 1) )
 +        {
 +            return GL_FALSE;
 +        }
 +        if( GL_FALSE == mov_temp(pAsm, 2) )
 +        {
 +            return GL_FALSE;
 +        }
 +
 +        return GL_TRUE;
 +    }
 +    else if( (GL_TRUE == bSrcConst[0]) && 
 +             (GL_TRUE == bSrcConst[1]) ) 
 +    {
 +        if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)    
 +          {
 +            if( GL_FALSE == mov_temp(pAsm, 1) )
 +            {
 +                return 1;
 +            }
 +        }
 +
 +        return GL_TRUE;
 +    }
 +    else if ( (GL_TRUE == bSrcConst[0]) && 
 +              (GL_TRUE == bSrcConst[2]) )  
 +    {
 +        if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index)     
 +          {
 +            if( GL_FALSE == mov_temp(pAsm, 2) )
 +            {
 +                return GL_FALSE;
 +            }
 +        }
 +
 +        return GL_TRUE;
 +    }
 +    else if( (GL_TRUE == bSrcConst[1]) && 
 +             (GL_TRUE == bSrcConst[2]) ) 
 +    {
 +        if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index)
 +          {
 +            if( GL_FALSE == mov_temp(pAsm, 2) )
 +            {
 +                return GL_FALSE;
 +            }
 +        }
 +
 +        return GL_TRUE;
 +    }
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean assemble_src(r700_AssemblerBase *pAsm,
 +                       int src, 
 +                       int fld)
 +{
 +    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
 +
 +    if (fld == -1)
 +    {
 +        fld = src;
 +    }
 +
 +    if(pAsm->aArgSubst[1+src] >= 0) 
 +    {
 +        setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
 +        pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
 +        pAsm->S[fld].src.reg   = pAsm->aArgSubst[1+src];
 +    }
 +    else 
 +    {
 +        switch (pILInst->SrcReg[src].File)
 +        {
 +        case PROGRAM_TEMPORARY:
 +            setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
 +            pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
 +            pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number;
 +            break;
 +        case PROGRAM_CONSTANT:
 +        case PROGRAM_LOCAL_PARAM:
 +        case PROGRAM_ENV_PARAM:
 +        case PROGRAM_STATE_VAR:
 +            if (1 == pILInst->SrcReg[src].RelAddr)
 +            {
 +                setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
 +            }
 +            else
 +            {
 +                setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);              
 +            }
 +
 +            pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
 +            pAsm->S[fld].src.reg   = pILInst->SrcReg[src].Index;
 +            break;      
 +        case PROGRAM_INPUT:
 +            setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
 +            pAsm->S[fld].src.rtype = SRC_REG_INPUT;
 +            switch (pAsm->currentShaderType)
 +            {
 +            case SPT_FP:
 +                pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index];
 +                break;
 +            case SPT_VP:
 +                pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index];
 +                break;
 +            }
 +            break;      
 +        default:
 +            r700_error(ERROR_ASM_SRCARGUMENT, "Invalid source argument type");          
 +            return GL_FALSE;
 +        }
 +    } 
 +
 +    pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7;
 +    pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7;
 +    pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7;
 +    pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7;
 +
-     pAsm->S[0].src.negx = pILInst->SrcReg[0].NegateBase & 0x1;
-     pAsm->S[0].src.negy = (pILInst->SrcReg[0].NegateBase >> 1) & 0x1;
-     pAsm->S[0].src.negz = (pILInst->SrcReg[0].NegateBase >> 2) & 0x1;
-     pAsm->S[0].src.negw = (pILInst->SrcReg[0].NegateBase >> 3) & 0x1;
++    pAsm->S[fld].src.negx = pILInst->SrcReg[src].Negate & 0x1;
++    pAsm->S[fld].src.negy = (pILInst->SrcReg[src].Negate >> 1) & 0x1;
++    pAsm->S[fld].src.negz = (pILInst->SrcReg[src].Negate >> 2) & 0x1;
++    pAsm->S[fld].src.negw = (pILInst->SrcReg[src].Negate >> 3) & 0x1;
 +     
 +    return GL_TRUE;
 +}
 +
 +GLboolean assemble_dst(r700_AssemblerBase *pAsm)
 +{
 +    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
 +    switch (pILInst->DstReg.File) 
 +    {
 +    case PROGRAM_TEMPORARY:
 +        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
 +        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
 +        pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number;
 +        break;
 +    case PROGRAM_ADDRESS:
 +        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
 +        pAsm->D.dst.rtype = DST_REG_A0;
 +        pAsm->D.dst.reg = 0;
 +        break;
 +    case PROGRAM_OUTPUT:
 +        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
 +        pAsm->D.dst.rtype = DST_REG_OUT;
 +        switch (pAsm->currentShaderType)
 +        {
 +        case SPT_FP:
 +            pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
 +            break;
 +        case SPT_VP:
 +            pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
 +            break;
 +        }
 +        break;   
 +    default:
 +        r700_error(ERROR_ASM_DSTARGUMENT, "Invalid destination output argument type");
 +        return GL_FALSE;
 +    }
 +
 +    pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
 +    pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
 +    pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
 +    pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
 +  
 +    return GL_TRUE;
 +}
 +
 +GLboolean tex_dst(r700_AssemblerBase *pAsm)
 +{
 +    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
 +
 +    if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
 +    {
 +        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
 +        pAsm->D.dst.reg   = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
 +
 +        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
 +    }
 +    else if(PROGRAM_OUTPUT == pILInst->DstReg.File)
 +    {
 +        pAsm->D.dst.rtype = DST_REG_OUT;
 +        switch (pAsm->currentShaderType)
 +        {
 +        case SPT_FP:
 +            pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
 +            break;
 +        case SPT_VP:
 +            pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
 +            break;
 +        }
 +
 +        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
 +    }
 +    else 
 +    {
 +        r700_error(ERROR_ASM_DSTARGUMENT, "Invalid destination output argument type");
 +        return GL_FALSE;
 +    }
 +
 +    pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
 +    pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
 +    pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
 +    pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
 +  
 +    return GL_TRUE;
 +}
 +
 +GLboolean tex_src(r700_AssemblerBase *pAsm)
 +{
 +    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
 + 
 +    GLboolean bValidTexCoord = GL_FALSE;
 +
 +    switch (pILInst->SrcReg[0].File)
 +    {
 +    case PROGRAM_TEMPORARY:
 +        bValidTexCoord = GL_TRUE;
 +
 +        pAsm->S[0].src.reg   = pILInst->SrcReg[0].Index + pAsm->starting_temp_register_number;
 +        pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
 +
 +        break;
 +    case PROGRAM_INPUT:
 +        switch (pILInst->SrcReg[0].Index)
 +        {
 +        case FRAG_ATTRIB_COL0:
 +        case FRAG_ATTRIB_COL1:
 +        case FRAG_ATTRIB_TEX0:
 +        case FRAG_ATTRIB_TEX1:
 +        case FRAG_ATTRIB_TEX2:
 +        case FRAG_ATTRIB_TEX3:
 +        case FRAG_ATTRIB_TEX4:
 +        case FRAG_ATTRIB_TEX5:
 +        case FRAG_ATTRIB_TEX6:
 +        case FRAG_ATTRIB_TEX7:
 +            bValidTexCoord = GL_TRUE;
 +
 +            pAsm->S[0].src.reg   = pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
 +            pAsm->S[0].src.rtype = SRC_REG_INPUT;
 +        }
 +        break;
 +    }
 +
 +    if(GL_TRUE == bValidTexCoord)
 +    { 
 +        setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
 +    }
 +    else
 +    {
 +        r700_error(ERROR_ASM_BADTEXSRC, "Invalid source texcoord for TEX instruction");
 +        return GL_FALSE;
 +    }
 +
 +    pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7;
 +    pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7;
 +    pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7;
 +    pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7;
 +
++    pAsm->S[0].src.negx = pILInst->SrcReg[0].Negate & 0x1;
++    pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1;
++    pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1;
++    pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1;
 +     
 +    return GL_TRUE;
 +}
 +
 +GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm)
 +{
 +    PVSSRC *   texture_coordinate_source;
 +    PVSSRC *   texture_unit_source;
 +    
 +    R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction);
 +      if (tex_instruction_ptr == NULL) 
 +      {
 +              return GL_FALSE;
 +      }
 +    Init_R700TextureInstruction(tex_instruction_ptr);
 +
 +    texture_coordinate_source = &(pAsm->S[0].src);
 +    texture_unit_source       = &(pAsm->S[1].src);
 +
 +    tex_instruction_ptr->m_Word0.f.tex_inst         = pAsm->D.dst.opcode;
 +    tex_instruction_ptr->m_Word0.f.bc_frac_mode     = 0x0;
 +    tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
 +
 +    tex_instruction_ptr->m_Word0.f.resource_id      = texture_unit_source->reg;
 +
 +    tex_instruction_ptr->m_Word1.f.lod_bias     = 0x0;
 +    tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED;
 +    tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED;
 +    tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED;
 +    tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED;
 +
 +    tex_instruction_ptr->m_Word2.f.offset_x   = 0x0;
 +    tex_instruction_ptr->m_Word2.f.offset_y   = 0x0;
 +    tex_instruction_ptr->m_Word2.f.offset_z   = 0x0;
 +
 +    tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
 +
 +    // dst
 +    if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || 
 +         (pAsm->D.dst.rtype == DST_REG_OUT) ) 
 +    {
 +        tex_instruction_ptr->m_Word0.f.src_gpr    = texture_coordinate_source->reg;
 +        tex_instruction_ptr->m_Word0.f.src_rel    = SQ_ABSOLUTE;
 +
 +        tex_instruction_ptr->m_Word1.f.dst_gpr    = pAsm->D.dst.reg;
 +        tex_instruction_ptr->m_Word1.f.dst_rel    = SQ_ABSOLUTE;
 +
 +        tex_instruction_ptr->m_Word1.f.dst_sel_x  = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK);
 +        tex_instruction_ptr->m_Word1.f.dst_sel_y  = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK);
 +        tex_instruction_ptr->m_Word1.f.dst_sel_z  = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK);
 +        tex_instruction_ptr->m_Word1.f.dst_sel_w  = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK);
 +
 +
 +        tex_instruction_ptr->m_Word2.f.src_sel_x  = texture_coordinate_source->swizzlex;
 +        tex_instruction_ptr->m_Word2.f.src_sel_y  = texture_coordinate_source->swizzley;
 +        tex_instruction_ptr->m_Word2.f.src_sel_z  = texture_coordinate_source->swizzlez;
 +        tex_instruction_ptr->m_Word2.f.src_sel_w  = texture_coordinate_source->swizzlew;
 +    }
 +    else 
 +    {
 +        r700_error(ERROR_ASM_TEXDSTBADTYPE, "Only temp destination registers supported for TEX dest regs.");
 +        return GL_FALSE;
 +    }
 +
 +    if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    return GL_TRUE;
 +}
 +
 +void initialize(r700_AssemblerBase *pAsm)
 +{
 +    GLuint cycle, component;
 +
 +    for (cycle=0; cycle<NUMBER_OF_CYCLES; cycle++) 
 +    {
 +        for (component=0; component<NUMBER_OF_COMPONENTS; component++) 
 +        {
 +            pAsm->hw_gpr[cycle][component] = (-1);
 +        }
 +    }
 +    for (component=0; component<NUMBER_OF_COMPONENTS; component++) 
 +    {
 +        pAsm->hw_cfile_addr[component] = (-1);
 +        pAsm->hw_cfile_chan[component] = (-1);
 +    }
 +}
 +
 +GLboolean assemble_alu_src(R700ALUInstruction*  alu_instruction_ptr,
 +                           int                  source_index,
 +                           PVSSRC*              pSource,
 +                           BITS                 scalar_channel_index)
 +{
 +    BITS src_sel;
 +    BITS src_rel;
 +    BITS src_chan;
 +    BITS src_neg;
 +
 +    //--------------------------------------------------------------------------
 +    // Source for operands src0, src1. 
 +    // Values [0,127] correspond to GPR[0..127]. 
 +    // Values [256,511] correspond to cfile constants c[0..255]. 
 +
 +    //--------------------------------------------------------------------------
 +    // Other special values are shown in the list below.
 +
 +    // 248    SQ_ALU_SRC_0: special constant 0.0.
 +    // 249    SQ_ALU_SRC_1: special constant 1.0 float.
 +
 +    // 250    SQ_ALU_SRC_1_INT: special constant 1 integer.
 +    // 251    SQ_ALU_SRC_M_1_INT: special constant -1 integer.
 +
 +    // 252    SQ_ALU_SRC_0_5: special constant 0.5 float.
 +    // 253    SQ_ALU_SRC_LITERAL: literal constant.
 +
 +    // 254    SQ_ALU_SRC_PV: previous vector result.
 +    // 255    SQ_ALU_SRC_PS: previous scalar result.
 +    //--------------------------------------------------------------------------
 +
 +    BITS channel_swizzle;
 +    switch (scalar_channel_index) 
 +    {
 +        case 0: channel_swizzle = pSource->swizzlex; break;
 +        case 1: channel_swizzle = pSource->swizzley; break;
 +        case 2: channel_swizzle = pSource->swizzlez; break;
 +        case 3: channel_swizzle = pSource->swizzlew; break;
 +        default: channel_swizzle = SQ_SEL_MASK; break;
 +    }
 +
 +    if(channel_swizzle == SQ_SEL_0) 
 +    {
 +        src_sel = SQ_ALU_SRC_0; 
 +    }
 +    else if (channel_swizzle == SQ_SEL_1) 
 +    {
 +        src_sel = SQ_ALU_SRC_1; 
 +    }
 +    else 
 +    {
 +        if ( (pSource->rtype == SRC_REG_TEMPORARY) || 
 +             (pSource->rtype == SRC_REG_INPUT)
 +        ) 
 +        {
 +            src_sel = pSource->reg;
 +        }
 +        else if (pSource->rtype == SRC_REG_CONSTANT)
 +        {
 +            src_sel = pSource->reg + CFILE_REGISTER_OFFSET;            
 +        }
 +        else
 +        {
 +            r700_error(ERROR_ASM_ALUSRCBADTYPE, "Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.", 
 +                     source_index, pSource->rtype);
 +            return GL_FALSE;
 +        }
 +    }
 +
 +    if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) ) 
 +    {
 +        src_rel = SQ_ABSOLUTE;
 +    }
 +    else 
 +    {
 +        src_rel = SQ_RELATIVE;
 +    }
 +
 +    switch (channel_swizzle) 
 +    {
 +        case SQ_SEL_X: 
 +            src_chan = SQ_CHAN_X; 
 +            break;
 +        case SQ_SEL_Y: 
 +            src_chan = SQ_CHAN_Y; 
 +            break;
 +        case SQ_SEL_Z: 
 +            src_chan = SQ_CHAN_Z; 
 +            break;
 +        case SQ_SEL_W: 
 +            src_chan = SQ_CHAN_W; 
 +            break;
 +        case SQ_SEL_0:
 +        case SQ_SEL_1:
 +            // Does not matter since src_sel controls
 +            src_chan = SQ_CHAN_X; 
 +            break;
 +        default:
 +            r700_error(ERROR_ASM_ALUSRCSELECT, "Unknown source select value (%d) in assemble_alu_src().");
 +            return GL_FALSE;
 +            break;
 +    }
 +
 +    switch (scalar_channel_index) 
 +    {
 +        case 0: src_neg = pSource->negx; break;
 +        case 1: src_neg = pSource->negy; break;
 +        case 2: src_neg = pSource->negz; break;
 +        case 3: src_neg = pSource->negw; break;
 +        default: src_neg = 0; break;
 +    }
 +
 +    switch (source_index) 
 +    {
 +        case 0:
 +            alu_instruction_ptr->m_Word0.f.src0_sel  = src_sel;
 +            alu_instruction_ptr->m_Word0.f.src0_rel  = src_rel;
 +            alu_instruction_ptr->m_Word0.f.src0_chan = src_chan;
 +            alu_instruction_ptr->m_Word0.f.src0_neg  = src_neg;
 +            break;
 +        case 1:
 +            alu_instruction_ptr->m_Word0.f.src1_sel  = src_sel;
 +            alu_instruction_ptr->m_Word0.f.src1_rel  = src_rel;
 +            alu_instruction_ptr->m_Word0.f.src1_chan = src_chan;
 +            alu_instruction_ptr->m_Word0.f.src1_neg  = src_neg;
 +            break;
 +        case 2:
 +            alu_instruction_ptr->m_Word1_OP3.f.src2_sel  = src_sel;
 +            alu_instruction_ptr->m_Word1_OP3.f.src2_rel  = src_rel;
 +            alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan;
 +            alu_instruction_ptr->m_Word1_OP3.f.src2_neg  = src_neg;
 +            break;
 +        default:
 +            r700_error(ERROR_ASM_ALUSRCNUMBER, "Only three sources allowed in ALU opcodes.");
 +          return GL_FALSE;
 +          break;
 +    }
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
 +                              R700ALUInstruction* alu_instruction_ptr,
 +                              GLuint              contiguous_slots_needed)
 +{
 +    if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if ( pAsm->cf_current_alu_clause_ptr == NULL ||
 +         ( (pAsm->cf_current_alu_clause_ptr != NULL) && 
 +           (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
 +         ) ) 
 +    {
 +
 +        //new cf inst for this clause
 +        pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause);
 +            
 +        // link the new cf to cf segment    
 +        if(NULL != pAsm->cf_current_alu_clause_ptr) 
 +        {
 +            Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr);
 +                      AddCFInstruction( pAsm->pR700Shader, 
 +                              (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr );            
 +        }
 +        else 
 +        {
 +            r700_error(ERROR_ASM_ALLOCALUCF, "Could not allocate a new ALU CF instruction.");
 +            return GL_FALSE;
 +        }
 +
 +        pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
 +        pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
 +        pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP;
 +
 +        pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
 +        pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
 +        pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
 +
 +        //cf_current_alu_clause_ptr->m_Word1.f.count           = number_of_scalar_operations - 1;
 +        pAsm->cf_current_alu_clause_ptr->m_Word1.f.count           = 0x0;
 +        pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst         = SQ_CF_INST_ALU;
 +
 +        pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
 +
 +        pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier         = 0x1;
 +    }
 +    else 
 +    {
 +        pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++;
 +    }
 +
 +    // If this clause constains any instruction that is forward dependent on a TEX instruction, 
 +    // set the whole_quad_mode for this clause
 +    if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) ) 
 +    {
 +        pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1;   
 +    }
 +
 +    if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) ) 
 +    {
 +        alu_instruction_ptr->m_Word0.f.last = 1;
 +    }
 +
 +    if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction)
 +    {
 +        pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr;
 +        alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr;
 +    }
 +    
 +    AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr);
 +
 +    return GL_TRUE;
 +}
 +
 +void get_src_properties(R700ALUInstruction*  alu_instruction_ptr,
 +                        int                  source_index,
 +                        BITS*                psrc_sel,
 +                        BITS*                psrc_rel,
 +                        BITS*                psrc_chan,
 +                        BITS*                psrc_neg)
 +{
 +    switch (source_index) 
 +    {
 +        case 0:
 +            *psrc_sel  = alu_instruction_ptr->m_Word0.f.src0_sel ;
 +            *psrc_rel  = alu_instruction_ptr->m_Word0.f.src0_rel ;
 +            *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan;
 +            *psrc_neg  = alu_instruction_ptr->m_Word0.f.src0_neg ;
 +            break;
 +
 +        case 1:
 +            *psrc_sel  = alu_instruction_ptr->m_Word0.f.src1_sel ;
 +            *psrc_rel  = alu_instruction_ptr->m_Word0.f.src1_rel ;
 +            *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan;
 +            *psrc_neg  = alu_instruction_ptr->m_Word0.f.src1_neg ;
 +            break;
 +
 +        case 2:
 +            *psrc_sel  = alu_instruction_ptr->m_Word1_OP3.f.src2_sel;
 +            *psrc_rel  = alu_instruction_ptr->m_Word1_OP3.f.src2_rel;
 +            *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan;
 +            *psrc_neg  = alu_instruction_ptr->m_Word1_OP3.f.src2_neg;
 +            break;
 +    }
 +}
 +
 +int is_cfile(BITS sel) 
 +{
 +    if (sel > 255 && sel < 512) 
 +    {
 +        return 1;
 +    }
 +    return 0;
 +}
 +
 +int is_const(BITS sel) 
 +{
 +    if (is_cfile(sel)) 
 +    {
 +        return 1;
 +    }
 +    else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL) 
 +    {
 +        return 1;
 +    }
 +    return 0;
 +}
 +
 +int is_gpr(BITS sel) 
 +{
 +    if (sel >= 0 && sel < 128) 
 +    {
 +        return 1;
 +    }
 +    return 0;
 +}
 +
 +const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210,  //000
 +                                    SQ_ALU_VEC_120,  //001
 +                                    SQ_ALU_VEC_102,  //010
 +
 +                                    SQ_ALU_VEC_201,  //011
 +                                    SQ_ALU_VEC_012,  //100
 +                                    SQ_ALU_VEC_021,  //101
 +
 +                                    SQ_ALU_VEC_012,  //110
 +                                    SQ_ALU_VEC_012}; //111
 +
 +const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210,  //000
 +                                    SQ_ALU_SCL_122,  //001 
 +                                    SQ_ALU_SCL_122,  //010
 +
 +                                    SQ_ALU_SCL_221,  //011
 +                                    SQ_ALU_SCL_212,  //100
 +                                    SQ_ALU_SCL_122,  //101
 +
 +                                    SQ_ALU_SCL_122,  //110
 +                                    SQ_ALU_SCL_122}; //111
 +
 +GLboolean reserve_cfile(r700_AssemblerBase* pAsm, 
 +                        GLuint sel, 
 +                        GLuint chan)
 +{
 +    int res_match = (-1);
 +    int res_empty = (-1);
 +
 +    GLint res;
 +
 +    for (res=3; res>=0; res--) 
 +    {
 +        if(pAsm->hw_cfile_addr[ res] < 0)  
 +        {
 +            res_empty = res;
 +        }
 +        else if( (pAsm->hw_cfile_addr[res] == (int)sel)
 +                 &&
 +                 (pAsm->hw_cfile_chan[ res ] == (int) chan) ) 
 +        {
 +            res_match = res;
 +        }
 +    }
 +
 +    if(res_match >= 0) 
 +    {
 +        // Read for this scalar component already reserved, nothing to do here.
 +        ;
 +    }
 +    else if(res_empty >= 0) 
 +    {
 +        pAsm->hw_cfile_addr[ res_empty ] = sel;
 +        pAsm->hw_cfile_chan[ res_empty ] = chan;
 +    }
 +    else 
 +    {
 +        r700_error(ERROR_ASM_CONSTCHANNEL, "All cfile read ports are used, cannot reference C$sel, channel $chan.");
 +        return GL_FALSE;
 +    }
 +    return GL_TRUE;
 +}
 +
 +GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle)
 +{
 +    if(pAsm->hw_gpr[cycle][chan] < 0) 
 +    {
 +        pAsm->hw_gpr[cycle][chan] = sel;
 +    }
 +    else if(pAsm->hw_gpr[cycle][chan] != (int)sel) 
 +    {
 +        r700_error(ERROR_ASM_BADGPRRESERVE, "Another scalar operation has already used GPR read port for given channel");
 +        return GL_FALSE;
 +    }
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
 +{
 +    switch (swiz) 
 +    {
 +        case SQ_ALU_SCL_210:
 +            {
 +                int table[3] = {2,    1,      0};
 +                *pCycle = table[sel];
 +                return GL_TRUE;
 +            }
 +            break;
 +        case SQ_ALU_SCL_122:
 +            {
 +                int table[3] = {1,    2,      2};
 +                *pCycle = table[sel];
 +                return GL_TRUE;
 +            }
 +            break;
 +        case SQ_ALU_SCL_212:
 +            { 
 +                int table[3] = {2,    1,      2};
 +                *pCycle = table[sel];
 +                return GL_TRUE;
 +            }
 +            break;
 +        case SQ_ALU_SCL_221:
 +            {
 +                int table[3] = {2, 2, 1};
 +                *pCycle = table[sel];
 +                return GL_TRUE;
 +            }
 +            break;
 +        default:
 +            r700_error(ERROR_ASM_BADSCALARBZ, "Bad Scalar bank swizzle value");
 +            break;
 +    }
 +
 +    return GL_FALSE;
 +}
 +
 +GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
 +{
 +    switch (swiz) 
 +    {
 +        case SQ_ALU_VEC_012:
 +            {
 +                int table[3] = {0, 1, 2};
 +                *pCycle = table[sel];
 +            }
 +            break;
 +        case SQ_ALU_VEC_021:
 +            {
 +                int table[3] = {0, 2, 1};
 +                *pCycle = table[sel];
 +            }
 +            break;        
 +        case SQ_ALU_VEC_120:
 +            {
 +                int table[3] = {1, 2, 0};
 +                *pCycle = table[sel];
 +            }
 +            break;
 +        case SQ_ALU_VEC_102:
 +            {
 +                int table[3] = {1, 0, 2};
 +                *pCycle = table[sel];
 +            }
 +            break;
 +        case SQ_ALU_VEC_201:
 +            {
 +                int table[3] = {2, 0, 1};
 +                *pCycle = table[sel];
 +            }
 +            break;
 +        case SQ_ALU_VEC_210:
 +            {
 +                int table[3] = {2, 1, 0};
 +                *pCycle = table[sel];
 +            }
 +            break;
 +        default:
 +            r700_error(ERROR_ASM_BADVECTORBZ, "Bad Vec bank swizzle value");
 +            return GL_FALSE;
 +            break;
 +    }
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean check_scalar(r700_AssemblerBase* pAsm,
 +                       R700ALUInstruction* alu_instruction_ptr)
 +{
 +    GLuint cycle;
 +    GLuint bank_swizzle;
 +    GLuint const_count = 0;
 +
 +    BITS sel;
 +    BITS chan;
 +    BITS rel;
 +    BITS neg;
 +
 +    GLuint src;
 +
 +    BITS src_sel [3] = {0,0,0};
 +    BITS src_chan[3] = {0,0,0};
 +    BITS src_rel [3] = {0,0,0};
 +    BITS src_neg [3] = {0,0,0};
 +
 +    GLuint swizzle_key;
 +
 +    GLuint number_of_operands = r700GetNumOperands(pAsm);
 +
 +    for (src=0; src<number_of_operands; src++) 
 +    {
 +        get_src_properties(alu_instruction_ptr,
 +                           src,
 +                           &(src_sel[src]), 
 +                           &(src_rel[src]), 
 +                           &(src_chan[src]), 
 +                           &(src_neg[src]) );
 +    }
 +
 +
 +    swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) + 
 +                    (is_const( src_sel[1] ) ? 2 : 0) + 
 +                    (is_const( src_sel[2] ) ? 1 : 0) );
 +  
 +    alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ];
 +
 +    for (src=0; src<number_of_operands; src++) 
 +    {
 +        sel  = src_sel [src];
 +        chan = src_chan[src];
 +        rel  = src_rel [src];
 +        neg  = src_neg [src];
 +
 +        if (is_const( sel )) 
 +        {
 +            // Any constant, including literal and inline constants
 +            const_count++;
 +
 +            if (is_cfile( sel )) 
 +            {
 +                reserve_cfile(pAsm, sel, chan);
 +            }
 +
 +        }
 +    }
 +
 +    for (src=0; src<number_of_operands; src++) 
 +    {
 +        sel  = src_sel [src];
 +        chan = src_chan[src];
 +        rel  = src_rel [src];
 +        neg  = src_neg [src];
 +
 +        if( is_gpr(sel) ) 
 +        {
 +            bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
 +
 +            if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) )
 +            {
 +                return GL_FALSE;
 +            }
 +
 +            if(cycle < const_count) 
 +            {
 +                if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
 +                {
 +                    return GL_FALSE;
 +                }
 +            }
 +        }
 +    }
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean check_vector(r700_AssemblerBase* pAsm,
 +                       R700ALUInstruction* alu_instruction_ptr)
 +{
 +    GLuint cycle;
 +    GLuint bank_swizzle;
 +    GLuint const_count = 0;
 +
 +    GLuint src;
 +
 +    BITS sel;
 +    BITS chan;
 +    BITS rel;
 +    BITS neg;
 +
 +    BITS src_sel [3] = {0,0,0};
 +    BITS src_chan[3] = {0,0,0};
 +    BITS src_rel [3] = {0,0,0};
 +    BITS src_neg [3] = {0,0,0};
 +
 +    GLuint swizzle_key;
 +
 +    GLuint number_of_operands = r700GetNumOperands(pAsm);
 +
 +    for (src=0; src<number_of_operands; src++) 
 +    {
 +        get_src_properties(alu_instruction_ptr,
 +                           src,
 +                           &(src_sel[src]), 
 +                           &(src_rel[src]), 
 +                           &(src_chan[src]), 
 +                           &(src_neg[src]) );
 +    }
 +
 +
 +    swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) + 
 +                           (is_const( src_sel[1] ) ? 2 : 0) + 
 +                           (is_const( src_sel[2] ) ? 1 : 0) 
 +                         );
 +
 +    alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key];
 +
 +    for (src=0; src<number_of_operands; src++) 
 +    {
 +        sel  = src_sel [src];
 +        chan = src_chan[src];
 +        rel  = src_rel [src];
 +        neg  = src_neg [src];
 +
 +
 +        bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
 +
 +        if( is_gpr(sel) ) 
 +        {
 +            if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
 +            {
 +                return GL_FALSE;
 +            }
 +
 +            if ( (src  == 1)          && 
 +                 (sel  == src_sel[0]) &&
 +                 (chan == src_chan[0]) ) 
 +            {        
 +            }
 +            else 
 +            {
 +                if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
 +                {
 +                    return GL_FALSE;
 +                }
 +            }
 +        }
 +        else if( is_const(sel) ) 
 +        {                  
 +            const_count++;
 +
 +            if( is_cfile(sel) ) 
 +            {        
 +                if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
 +                {
 +                    return GL_FALSE;
 +                }
 +            }
 +        }
 +    }
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
 +{
 +    GLuint    number_of_scalar_operations;
 +    GLboolean is_single_scalar_operation;
 +    GLuint    scalar_channel_index;
 +
 +    PVSSRC * pcurrent_source;
 +    int    current_source_index;
 +    GLuint contiguous_slots_needed;
 +
 +    GLuint    uNumSrc = r700GetNumOperands(pAsm);
 +    GLuint    channel_swizzle, j;
 +    GLuint    chan_counter[4] = {0, 0, 0, 0};
 +    PVSSRC *  pSource[3];
 +    GLboolean bSplitInst = GL_FALSE;
 +
 +    if (1 == pAsm->D.dst.math) 
 +    {
 +        is_single_scalar_operation = GL_TRUE;
 +        number_of_scalar_operations = 1;
 +    }
 +    else 
 +    {
 +        is_single_scalar_operation = GL_FALSE;
 +        number_of_scalar_operations = 4;
 +        
 +        /* check read port, only very preliminary algorithm, not count in 
 +           src0/1 same comp case and prev slot repeat case; also not count relative
 +           addressing. TODO: improve performance. */
 +        for(j=0; j<uNumSrc; j++)
 +        {
 +            pSource[j] = &(pAsm->S[j].src);
 +        }
 +        for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++) 
 +        {
 +            for(j=0; j<uNumSrc; j++) 
 +            {
 +                switch (scalar_channel_index) 
 +                {
 +                    case 0: channel_swizzle = pSource[j]->swizzlex; break;
 +                    case 1: channel_swizzle = pSource[j]->swizzley; break;
 +                    case 2: channel_swizzle = pSource[j]->swizzlez; break;
 +                    case 3: channel_swizzle = pSource[j]->swizzlew; break;
 +                    default: channel_swizzle = SQ_SEL_MASK; break;
 +                }
 +                if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) || 
 +                     (pSource[j]->rtype == SRC_REG_INPUT))
 +                     && (channel_swizzle <= SQ_SEL_W) )
 +                {                    
 +                    chan_counter[channel_swizzle]++;                        
 +                }
 +            }
 +        }
 +        if(   (chan_counter[SQ_SEL_X] > 3)
 +           || (chan_counter[SQ_SEL_Y] > 3)
 +           || (chan_counter[SQ_SEL_Z] > 3)
 +           || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */
 +        {
 +            bSplitInst = GL_TRUE;
 +        }
 +    }
 +
 +    contiguous_slots_needed = 0;
 +
 +    if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) ) 
 +    {
 +        contiguous_slots_needed = 4;
 +    }
 +
 +    initialize(pAsm);    
 +
 +    for (scalar_channel_index=0;
 +            scalar_channel_index < number_of_scalar_operations; 
 +                scalar_channel_index++) 
 +    {
 +        R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
 +        if (alu_instruction_ptr == NULL) 
 +              {
 +                      return GL_FALSE;
 +              }
 +        Init_R700ALUInstruction(alu_instruction_ptr);
 +        
 +        //src 0
 +        current_source_index = 0;
 +        pcurrent_source = &(pAsm->S[0].src);
 +
 +        if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
 +                                         current_source_index,
 +                                         pcurrent_source, 
 +                                         scalar_channel_index) )     
 +        {
 +            return GL_FALSE;
 +        }
 +   
 +        if (pAsm->D.dst.math == 0) 
 +        {            
 +            // Process source 1            
 +            current_source_index = 1;
 +            pcurrent_source = &(pAsm->S[current_source_index].src);
 +
 +            if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
 +                                             current_source_index,
 +                                             pcurrent_source, 
 +                                             scalar_channel_index) ) 
 +            {
 +                return GL_FALSE;
 +            }
 +        }
 +
 +        //other bits
 +        alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP;
 +
 +        if(   (is_single_scalar_operation == GL_TRUE) 
 +           || (GL_TRUE == bSplitInst) )
 +        {
 +            alu_instruction_ptr->m_Word0.f.last = 1;
 +        }
 +        else 
 +        {
 +            alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ?  1 : 0;
 +        }
 +
 +        alu_instruction_ptr->m_Word0.f.pred_sel                = 0x0;
 +        alu_instruction_ptr->m_Word1_OP2.f.update_pred         = 0x0;  
 +        alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
 +
 +        // dst
 +        if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || 
 +            (pAsm->D.dst.rtype == DST_REG_OUT) ) 
 +        {
 +            alu_instruction_ptr->m_Word1.f.dst_gpr  = pAsm->D.dst.reg;
 +        }
 +        else 
 +        {
 +            r700_error(ERROR_ASM_ALUDSTBADTYPE, "Only temp destination registers supported for ALU dest regs.");
 +            return GL_FALSE;
 +        }
 +
 +        alu_instruction_ptr->m_Word1.f.dst_rel  = SQ_ABSOLUTE;  //D.rtype
 +
 +        if ( is_single_scalar_operation == GL_TRUE ) 
 +        {
 +            // Override scalar_channel_index since only one scalar value will be written
 +            if(pAsm->D.dst.writex) 
 +            {
 +                scalar_channel_index = 0;
 +            }
 +            else if(pAsm->D.dst.writey) 
 +            {
 +                scalar_channel_index = 1;
 +            }
 +            else if(pAsm->D.dst.writez) 
 +            {
 +                scalar_channel_index = 2;
 +            }
 +            else if(pAsm->D.dst.writew) 
 +            {
 +                scalar_channel_index = 3;
 +            }
 +        }
 +
 +        alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
 +
 +        alu_instruction_ptr->m_Word1.f.clamp    = pAsm->pILInst[pAsm->uiCurInst].SaturateMode;
 +
 +        if (pAsm->D.dst.op3) 
 +        {            
 +            //op3
 +
 +            alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
 +
 +            //There's 3rd src for op3
 +            current_source_index = 2;
 +            pcurrent_source = &(pAsm->S[current_source_index].src);
 +
 +            if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
 +                                              current_source_index,
 +                                              pcurrent_source, 
 +                                              scalar_channel_index) ) 
 +            {
 +                return GL_FALSE;
 +            }
 +        }
 +        else 
 +        {
 +            //op2
 +            if (pAsm->bR6xx)
 +            {
 +                alu_instruction_ptr->m_Word1_OP2.f6.alu_inst           = pAsm->D.dst.opcode;
 +
 +                alu_instruction_ptr->m_Word1_OP2.f6.src0_abs           = 0x0;
 +                alu_instruction_ptr->m_Word1_OP2.f6.src1_abs           = 0x0;
 +
 +                //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
 +                //alu_instruction_ptr->m_Word1_OP2.f6.update_pred         = 0x0;
 +                switch (scalar_channel_index) 
 +                {
 +                    case 0: 
 +                        alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex; 
 +                        break;
 +                    case 1: 
 +                        alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey; 
 +                        break;
 +                    case 2: 
 +                        alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez; 
 +                        break;
 +                    case 3: 
 +                        alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew; 
 +                        break;
 +                    default: 
 +                        alu_instruction_ptr->m_Word1_OP2.f6.write_mask = SQ_SEL_MASK; 
 +                        break;
 +                }            
 +                alu_instruction_ptr->m_Word1_OP2.f6.omod               = SQ_ALU_OMOD_OFF;
 +            }
 +            else
 +            {
 +                alu_instruction_ptr->m_Word1_OP2.f.alu_inst           = pAsm->D.dst.opcode;
 +
 +                alu_instruction_ptr->m_Word1_OP2.f.src0_abs           = 0x0;
 +                alu_instruction_ptr->m_Word1_OP2.f.src1_abs           = 0x0;
 +
 +                //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
 +                //alu_instruction_ptr->m_Word1_OP2.f.update_pred         = 0x0;
 +                switch (scalar_channel_index) 
 +                {
 +                    case 0: 
 +                        alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex; 
 +                        break;
 +                    case 1: 
 +                        alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey; 
 +                        break;
 +                    case 2: 
 +                        alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez; 
 +                        break;
 +                    case 3: 
 +                        alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew; 
 +                        break;
 +                    default: 
 +                        alu_instruction_ptr->m_Word1_OP2.f.write_mask = SQ_SEL_MASK; 
 +                        break;
 +                }            
 +                alu_instruction_ptr->m_Word1_OP2.f.omod               = SQ_ALU_OMOD_OFF;
 +            }
 +        }
 +
 +        if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
 +        {
 +            return GL_FALSE;
 +        }
 +
 +        /*
 +         * Judge the type of current instruction, is it vector or scalar 
 +         * instruction.
 +         */        
 +        if (is_single_scalar_operation) 
 +        {
 +            if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
 +            {
 +                return GL_FALSE;
 +            }
 +        }
 +        else 
 +        {
 +            if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
 +            {
 +                return 1;
 +            }
 +        }
 +
 +        contiguous_slots_needed = 0;
 +    }
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean next_ins(r700_AssemblerBase *pAsm)
 +{
 +    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
 +
 +    if( GL_TRUE == IsTex(pILInst->Opcode) )
 +    {
 +        if( GL_FALSE == assemble_tex_instruction(pAsm) ) 
 +        {
 +            r700_error(ERROR_ASM_TEXINSTRUCTION, "Error assembling TEX instruction");
 +            return GL_FALSE;
 +        }
 +    }
 +    else 
 +    {   //ALU      
 +        if( GL_FALSE == assemble_alu_instruction(pAsm) ) 
 +        {
 +            r700_error(ERROR_ASM_TEXINSTRUCTION, "Error assembling ALU instruction");
 +            return GL_FALSE;
 +        }
 +    } 
 +      
 +    if(pAsm->D.dst.rtype == DST_REG_OUT) 
 +    {
 +        if(pAsm->D.dst.op3) 
 +        {        
 +            // There is no mask for OP3 instructions, so all channels are written        
 +            pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
 +        }
 +        else 
 +        {
 +            pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] 
 +               |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
 +        }
 +    }
 +    
 +    //reset for next inst.
 +    pAsm->D.bits    = 0;
 +    pAsm->S[0].bits = 0;
 +    pAsm->S[1].bits = 0;
 +    pAsm->S[2].bits = 0;
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode)
 +{
 +    BITS tmp;
 +
 +    checkop1(pAsm);
 +
 +    tmp = gethelpr(pAsm);
 +
 +    // opcode  tmp.x,    a.x
 +    // MOV     dst,      tmp.x
 +
 +    pAsm->D.dst.opcode = opcode;
 +    pAsm->D.dst.math = 1;
 +
 +    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
 +    pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
 +    pAsm->D.dst.reg    = tmp;
 +    pAsm->D.dst.writex = 1;
 +
 +    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if ( GL_FALSE == next_ins(pAsm) ) 
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    // Now replicate result to all necessary channels in destination
 +    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
 +
 +    if( GL_FALSE == assemble_dst(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
 +    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
 +    pAsm->S[0].src.reg   = tmp;
 +
 +    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
 +    noneg_PVSSRC(&(pAsm->S[0].src));
 +
 +    if( GL_FALSE == next_ins(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean assemble_ABS(r700_AssemblerBase *pAsm)
 +{
 +    checkop1(pAsm);
 +
 +    pAsm->D.dst.opcode = SQ_OP2_INST_MAX;  
 +
 +    if( GL_FALSE == assemble_dst(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
 +    {
 +        return GL_FALSE;
 +    }
 + 
 +    pAsm->S[1].bits = pAsm->S[0].bits;
 +    flipneg_PVSSRC(&(pAsm->S[1].src));
 +
 +    if ( GL_FALSE == next_ins(pAsm) ) 
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
 +{
 +    if( GL_FALSE == checkop2(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
 + 
 +    if( GL_FALSE == assemble_dst(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 + 
 +    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if( GL_FALSE == assemble_src(pAsm, 1, -1) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB)
 +    {
 +        flipneg_PVSSRC(&(pAsm->S[1].src));
 +    }
 +
 +    if( GL_FALSE == next_ins(pAsm) ) 
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean assemble_BAD(char *opcode_str) 
 +{
 +    r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction (%s)", opcode_str);
 +    return GL_FALSE;
 +}
 +
 +GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
 +{
 +    int tmp;
 +
 +    if( GL_FALSE == checkop3(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
 +    pAsm->D.dst.op3     = 1;  
 +
 +    tmp = (-1);
 +
 +    if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
 +    {
 +        //OP3 has no support for write mask
 +        tmp = gethelpr(pAsm);
 +
 +        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
 +        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
 +        pAsm->D.dst.reg   = tmp;
 +
 +        nomask_PVSDST(&(pAsm->D.dst));
 +    }
 +    else 
 +    {
 +        if( GL_FALSE == assemble_dst(pAsm) )
 +        {
 +            return GL_FALSE;
 +        }
 +    }
 +
 +    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
 +    {
 +        return GL_FALSE;
 +    }
 +              
 +    if( GL_FALSE == assemble_src(pAsm, 2, 1) )  
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if( GL_FALSE == assemble_src(pAsm, 1, 2) ) 
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if ( GL_FALSE == next_ins(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) 
 +    {
 +        if( GL_FALSE == assemble_dst(pAsm) )
 +        {
 +            return GL_FALSE;
 +        }
 +
 +        pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
 +
 +        //tmp for source
 +        setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
 +        pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
 +        pAsm->S[0].src.reg   = tmp;
 +
 +        noneg_PVSSRC(&(pAsm->S[0].src));
 +        noswizzle_PVSSRC(&(pAsm->S[0].src));
 +
 +        if( GL_FALSE == next_ins(pAsm) )
 +        {
 +            return GL_FALSE;
 +        }
 +    }
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean assemble_COS(r700_AssemblerBase *pAsm)
 +{
 +    return assemble_math_function(pAsm, SQ_OP2_INST_COS);
 +}
 + 
 +GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
 +{
 +    if( GL_FALSE == checkop2(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 + 
 +    pAsm->D.dst.opcode = SQ_OP2_INST_DOT4;  
 +
 +    if( GL_FALSE == assemble_dst(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if( GL_FALSE == assemble_src(pAsm, 1, -1) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
 +    {
 +        zerocomp_PVSSRC(&(pAsm->S[0].src), 3);
 +        zerocomp_PVSSRC(&(pAsm->S[1].src), 3);
 +    }
 +    else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH) 
 +    {
 +        onecomp_PVSSRC(&(pAsm->S[1].src), 3);
 +    } 
 +
 +    if ( GL_FALSE == next_ins(pAsm) ) 
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    return GL_TRUE;
 +}
 + 
 +GLboolean assemble_DST(r700_AssemblerBase *pAsm)
 +{
 +    if( GL_FALSE == checkop2(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
 +
 +    if( GL_FALSE == assemble_dst(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if( GL_FALSE == assemble_src(pAsm, 1, -1) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    onecomp_PVSSRC(&(pAsm->S[0].src), 0);
 +    onecomp_PVSSRC(&(pAsm->S[0].src), 3);
 +
 +    onecomp_PVSSRC(&(pAsm->S[1].src), 0);
 +    onecomp_PVSSRC(&(pAsm->S[1].src), 2);
 +
 +    if ( GL_FALSE == next_ins(pAsm) ) 
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
 +{
 +    return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
 +}
 + 
 +GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
 +{
 +    checkop1(pAsm);
 +
 +    pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;  
 +
 +    if ( GL_FALSE == assemble_dst(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if ( GL_FALSE == next_ins(pAsm) ) 
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm)
 +{
 +    return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT);
 +}
 +
 +GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
 +{
 +    checkop1(pAsm);
 +
 +    pAsm->D.dst.opcode = SQ_OP2_INST_FRACT; 
 +
 +    if ( GL_FALSE == assemble_dst(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if ( GL_FALSE == next_ins(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    return GL_TRUE;
 +}
 + 
 +GLboolean assemble_KIL(r700_AssemblerBase *pAsm)
 +{
 +    checkop1(pAsm);
 +
 +    pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT;  
 +  
 +    if ( GL_FALSE == assemble_dst(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    pAsm->D.dst.writex = 0;
 +    pAsm->D.dst.writey = 0;
 +    pAsm->D.dst.writez = 0;
 +    pAsm->D.dst.writew = 0;
 +
 +    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
 +    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
 +    pAsm->S[0].src.reg = 0;
 +
 +    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
 +    noneg_PVSSRC(&(pAsm->S[0].src));
 +
 +    pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
 +
 +    if(PROGRAM_TEMPORARY == pAsm->pILInst[pAsm->uiCurInst].DstReg.File)
 +    {
 +        pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
 +    }
 +    else
 +    {   //PROGRAM_OUTPUT
 +        pAsm->S[1].src.reg = pAsm->uiFP_OutputMap[pAsm->pILInst[pAsm->uiCurInst].DstReg.Index];
 +    }
 +  
 +    setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
 +    noswizzle_PVSSRC(&(pAsm->S[1].src));
 +  
 +    if ( GL_FALSE == next_ins(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    pAsm->pR700Shader->killIsUsed = GL_TRUE;
 +    
 +    return GL_TRUE;
 +}
 +
 +GLboolean assemble_LG2(r700_AssemblerBase *pAsm) 
 +{ 
 +    return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE);
 +}
 +
 +GLboolean assemble_LRP(r700_AssemblerBase *pAsm) 
 +{
 +    BITS tmp;
 +
 +    if( GL_FALSE == checkop3(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    tmp = gethelpr(pAsm);
 +
 +    pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
 +
 +    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
 +    pAsm->D.dst.reg   = tmp;
 +    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
 +    nomask_PVSDST(&(pAsm->D.dst));
 +
 +          
 +    if( GL_FALSE == assemble_src(pAsm, 1, 0) ) 
 +    {
 +          return GL_FALSE;
 +    }
 +
 +    if ( GL_FALSE == assemble_src(pAsm, 2, 1) )   
 +    {
 +          return GL_FALSE;
 +    }
 +
 +    neg_PVSSRC(&(pAsm->S[1].src));
 +
 +    if( GL_FALSE == next_ins(pAsm) ) 
 +    {
 +          return GL_FALSE;
 +    }
 +
 +    pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
 +    pAsm->D.dst.op3    = 1;
 +
 +    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
 +    pAsm->D.dst.reg = tmp;
 +    nomask_PVSDST(&(pAsm->D.dst));
 +    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
 +
 +    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
 +    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
 +    pAsm->S[0].src.reg = tmp;
 +    noswizzle_PVSSRC(&(pAsm->S[0].src));
 +
 +
 +    if( GL_FALSE == assemble_src(pAsm, 0, 1) ) 
 +    {
 +        return GL_FALSE;
 +    }
 +    if( GL_FALSE == assemble_src(pAsm, 2, -1) ) 
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if( GL_FALSE == next_ins(pAsm) ) 
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
 +
 +    if( GL_FALSE == assemble_dst(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
 +    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
 +    pAsm->S[0].src.reg = tmp;
 +    noswizzle_PVSSRC(&(pAsm->S[0].src));
 +
 +    if( GL_FALSE == next_ins(pAsm) ) 
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm) 
 +{
 +    int tmp, ii;
 +    GLboolean bReplaceDst = GL_FALSE;
 +    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
 +
 +      if( GL_FALSE == checkop3(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +      pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;  
 +      pAsm->D.dst.op3     = 1; 
 +
 +      tmp = (-1);
 +
 +    if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
 +    {   /* TODO : more investigation on MAD src and dst using same register */
 +        for(ii=0; ii<3; ii++)
 +        {
 +            if(   (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File)
 +               && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) )
 +            {
 +                bReplaceDst = GL_TRUE;
 +                break;
 +            }
 +        }
 +    }
 +    if(0xF != pILInst->DstReg.WriteMask)
 +    {   /* OP3 has no support for write mask */
 +        bReplaceDst = GL_TRUE;
 +    }
 +
 +      if(GL_TRUE == bReplaceDst)
 +    {
 +        tmp = gethelpr(pAsm);
 +
 +        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
 +        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
 +        pAsm->D.dst.reg   = tmp;
 +
 +        nomask_PVSDST(&(pAsm->D.dst));
 +    }
 +    else 
 +    {
 +        if( GL_FALSE == assemble_dst(pAsm) )
 +        {
 +            return GL_FALSE;
 +        }
 +    }
 +
 +      if( GL_FALSE == assemble_src(pAsm, 0, -1) )
 +    {
 +        return GL_FALSE;
 +    }
 +              
 +    if( GL_FALSE == assemble_src(pAsm, 1, -1) )  
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if( GL_FALSE == assemble_src(pAsm, 2, -1) ) 
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if ( GL_FALSE == next_ins(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +      if (GL_TRUE == bReplaceDst) 
 +    {
 +        if( GL_FALSE == assemble_dst(pAsm) )
 +        {
 +            return GL_FALSE;
 +        }
 +
 +        pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
 +
 +        //tmp for source
 +        setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
 +        pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
 +        pAsm->S[0].src.reg   = tmp;
 +
 +        noneg_PVSSRC(&(pAsm->S[0].src));
 +        noswizzle_PVSSRC(&(pAsm->S[0].src));
 +
 +        if( GL_FALSE == next_ins(pAsm) )
 +        {
 +            return GL_FALSE;
 +        }
 +    }
 +
 +    return GL_TRUE;
 +}
 +
 +/* LIT dst, src */
 +GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
 +{
 +    unsigned int dstReg;
 +    unsigned int dstType;
 +    unsigned int srcReg;
 +    unsigned int srcType;
 +    checkop1(pAsm);
 +    int tmp = gethelpr(pAsm);
 +
 +    if( GL_FALSE == assemble_dst(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
 +    {
 +        return GL_FALSE;
 +    }
 +    dstReg  = pAsm->D.dst.reg;
 +    dstType = pAsm->D.dst.rtype;
 +    srcReg  = pAsm->S[0].src.reg;
 +    srcType = pAsm->S[0].src.rtype;
 +
 +    /* dst.xw, <- 1.0  */
 +    pAsm->D.dst.opcode   = SQ_OP2_INST_MOV;
 +    pAsm->D.dst.rtype    = dstType;
 +    pAsm->D.dst.reg      = dstReg;
 +    pAsm->D.dst.writex   = 1;
 +    pAsm->D.dst.writey   = 0;
 +    pAsm->D.dst.writez   = 0;
 +    pAsm->D.dst.writew   = 1;
 +    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
 +    pAsm->S[0].src.reg   = tmp;
 +    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
 +    noneg_PVSSRC(&(pAsm->S[0].src));
 +    pAsm->S[0].src.swizzlex = SQ_SEL_1;
 +    pAsm->S[0].src.swizzley = SQ_SEL_1;
 +    pAsm->S[0].src.swizzlez = SQ_SEL_1;
 +    pAsm->S[0].src.swizzlew = SQ_SEL_1;
 +    if( GL_FALSE == next_ins(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    /* dst.y = max(src.x, 0.0) */
 +    pAsm->D.dst.opcode   = SQ_OP2_INST_MAX;
 +    pAsm->D.dst.rtype    = dstType;
 +    pAsm->D.dst.reg      = dstReg;
 +    pAsm->D.dst.writex   = 0;
 +    pAsm->D.dst.writey   = 1;
 +    pAsm->D.dst.writez   = 0;
 +    pAsm->D.dst.writew   = 0;
 +    pAsm->S[0].src.rtype = srcType;
 +    pAsm->S[0].src.reg   = srcReg;
 +    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
 +    noneg_PVSSRC(&(pAsm->S[0].src));
 +    pAsm->S[0].src.swizzlex = SQ_SEL_X;
 +    pAsm->S[0].src.swizzley = SQ_SEL_X;
 +    pAsm->S[0].src.swizzlez = SQ_SEL_X;
 +    pAsm->S[0].src.swizzlew = SQ_SEL_X;
 +    pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
 +    pAsm->S[1].src.reg   = tmp;
 +    setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
 +    noneg_PVSSRC(&(pAsm->S[1].src));
 +    pAsm->S[1].src.swizzlex = SQ_SEL_0;
 +    pAsm->S[1].src.swizzley = SQ_SEL_0;
 +    pAsm->S[1].src.swizzlez = SQ_SEL_0;
 +    pAsm->S[1].src.swizzlew = SQ_SEL_0;
 +    if( GL_FALSE == next_ins(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    /* dst.z = log(src.y) */
 +    pAsm->D.dst.opcode   = SQ_OP2_INST_LOG_CLAMPED;
 +    pAsm->D.dst.math     = 1;
 +    pAsm->D.dst.rtype    = dstType;
 +    pAsm->D.dst.reg      = dstReg;
 +    pAsm->D.dst.writex   = 0;
 +    pAsm->D.dst.writey   = 0;
 +    pAsm->D.dst.writez   = 1;
 +    pAsm->D.dst.writew   = 0;
 +    pAsm->S[0].src.rtype = srcType;
 +    pAsm->S[0].src.reg   = srcReg;
 +    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
 +    noneg_PVSSRC(&(pAsm->S[0].src));
 +    pAsm->S[0].src.swizzlex = SQ_SEL_Y;
 +    pAsm->S[0].src.swizzley = SQ_SEL_Y;
 +    pAsm->S[0].src.swizzlez = SQ_SEL_Y;
 +    pAsm->S[0].src.swizzlew = SQ_SEL_Y;
 +    if( GL_FALSE == next_ins(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    /* dst.w = MUL_LIT(src.w, dst.z, src.x ) */
 +    pAsm->D.dst.opcode   = SQ_OP3_INST_MUL_LIT;
 +    pAsm->D.dst.op3      = 1;
 +    pAsm->D.dst.rtype    = dstType;
 +    pAsm->D.dst.reg      = dstReg;
 +    pAsm->D.dst.writex   = 0;
 +    pAsm->D.dst.writey   = 0;
 +    pAsm->D.dst.writez   = 0;
 +    pAsm->D.dst.writew   = 1;
 +
 +    pAsm->S[0].src.rtype = srcType;
 +    pAsm->S[0].src.reg   = srcReg;
 +    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
 +    noneg_PVSSRC(&(pAsm->S[0].src));
 +    pAsm->S[0].src.swizzlex = SQ_SEL_W;
 +    pAsm->S[0].src.swizzley = SQ_SEL_W;
 +    pAsm->S[0].src.swizzlez = SQ_SEL_W;
 +    pAsm->S[0].src.swizzlew = SQ_SEL_W;
 +
 +    pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
 +    pAsm->S[1].src.reg   = dstReg;
 +    setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
 +    noneg_PVSSRC(&(pAsm->S[1].src));
 +    pAsm->S[1].src.swizzlex = SQ_SEL_Z;
 +    pAsm->S[1].src.swizzley = SQ_SEL_Z;
 +    pAsm->S[1].src.swizzlez = SQ_SEL_Z;
 +    pAsm->S[1].src.swizzlew = SQ_SEL_Z;
 +
 +    pAsm->S[2].src.rtype = srcType;
 +    pAsm->S[2].src.reg   = srcReg;
 +    setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
 +    noneg_PVSSRC(&(pAsm->S[2].src));
 +    pAsm->S[2].src.swizzlex = SQ_SEL_X;
 +    pAsm->S[2].src.swizzley = SQ_SEL_X;
 +    pAsm->S[2].src.swizzlez = SQ_SEL_X;
 +    pAsm->S[2].src.swizzlew = SQ_SEL_X;
 +
 +    if( GL_FALSE == next_ins(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    /* dst.z = exp(dst.z) */
 +    pAsm->D.dst.opcode   = SQ_OP2_INST_EXP_IEEE;
 +    pAsm->D.dst.math     = 1;
 +    pAsm->D.dst.rtype    = dstType;
 +    pAsm->D.dst.reg      = dstReg;
 +    pAsm->D.dst.writex   = 0;
 +    pAsm->D.dst.writey   = 0;
 +    pAsm->D.dst.writez   = 1;
 +    pAsm->D.dst.writew   = 0;
 +
 +    pAsm->S[0].src.rtype = dstType;
 +    pAsm->S[0].src.reg   = dstReg;
 +    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
 +    noneg_PVSSRC(&(pAsm->S[0].src));
 +    pAsm->S[0].src.swizzlex = SQ_SEL_Z;
 +    pAsm->S[0].src.swizzley = SQ_SEL_Z;
 +    pAsm->S[0].src.swizzlez = SQ_SEL_Z;
 +    pAsm->S[0].src.swizzlew = SQ_SEL_Z;
 +
 +    if( GL_FALSE == next_ins(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    return GL_TRUE;
 +}
 + 
 +GLboolean assemble_MAX(r700_AssemblerBase *pAsm) 
 +{
 +      if( GL_FALSE == checkop2(pAsm) )
 +      {
 +              return GL_FALSE;
 +      }
 +
 +      pAsm->D.dst.opcode = SQ_OP2_INST_MAX; 
 +      
 +      if( GL_FALSE == assemble_dst(pAsm) )
 +      {
 +              return GL_FALSE;
 +      }
 +
 +      if( GL_FALSE == assemble_src(pAsm, 0, -1) )
 +      {
 +              return GL_FALSE;
 +      }
 +
 +      if( GL_FALSE == assemble_src(pAsm, 1, -1) )
 +      {
 +              return GL_FALSE;
 +      }
 +
 +      if( GL_FALSE == next_ins(pAsm) )
 +      {
 +              return GL_FALSE;
 +      }
 +
 +    return GL_TRUE;
 +}
 + 
 +GLboolean assemble_MIN(r700_AssemblerBase *pAsm) 
 +{
 +      if( GL_FALSE == checkop2(pAsm) )
 +      {
 +              return GL_FALSE;
 +      }
 +
 +      pAsm->D.dst.opcode = SQ_OP2_INST_MIN;  
 +
 +      if( GL_FALSE == assemble_dst(pAsm) )
 +      {
 +              return GL_FALSE;
 +      }
 +
 +      if( GL_FALSE == assemble_src(pAsm, 0, -1) )
 +      {
 +              return GL_FALSE;
 +      }
 +
 +      if( GL_FALSE == assemble_src(pAsm, 1, -1) )
 +      {
 +              return GL_FALSE;
 +      }
 + 
 +      if( GL_FALSE == next_ins(pAsm) )
 +      {
 +              return GL_FALSE;
 +      }
 +
 +    return GL_TRUE;
 +}
 + 
 +GLboolean assemble_MOV(r700_AssemblerBase *pAsm) 
 +{
 +    checkop1(pAsm);
 +
 +    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
 +
 +    if (GL_FALSE == assemble_dst(pAsm))
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if (GL_FALSE == assemble_src(pAsm, 0, -1))
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if ( GL_FALSE == next_ins(pAsm) ) 
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    return GL_TRUE;
 +}
 + 
 +GLboolean assemble_MUL(r700_AssemblerBase *pAsm) 
 +{
 +      if( GL_FALSE == checkop2(pAsm) )
 +      {
 +              return GL_FALSE;
 +      }
 +
 +      pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
 +
 +      if( GL_FALSE == assemble_dst(pAsm) )
 +      {
 +              return GL_FALSE;
 +      }
 +
 +      if( GL_FALSE == assemble_src(pAsm, 0, -1) )
 +      {
 +              return GL_FALSE;
 +      }
 +
 +      if( GL_FALSE == assemble_src(pAsm, 1, -1) )
 +      {
 +              return GL_FALSE;
 +      }
 +
 +      if( GL_FALSE == next_ins(pAsm) ) 
 +      {
 +              return GL_FALSE;
 +      }
 +
 +    return GL_TRUE;
 +}
 + 
 +GLboolean assemble_POW(r700_AssemblerBase *pAsm) 
 +{
 +    BITS tmp;
 +
 +    checkop1(pAsm);
 +
 +    tmp = gethelpr(pAsm);
 +
 +    // LG2 tmp.x,     a.swizzle
 +    pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;  
 +    pAsm->D.dst.math = 1;
 +
 +    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
 +    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
 +    pAsm->D.dst.reg   = tmp;
 +    nomask_PVSDST(&(pAsm->D.dst));
 +
 +    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if( GL_FALSE == next_ins(pAsm) ) 
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    // MUL tmp.x,     tmp.x, b.swizzle
 +    pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
 +
 +    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
 +    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
 +    pAsm->D.dst.reg = tmp;
 +    nomask_PVSDST(&(pAsm->D.dst));
 +
 +    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
 +    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
 +    pAsm->S[0].src.reg = tmp;
 +    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
 +    noneg_PVSSRC(&(pAsm->S[0].src));
 +
 +    if( GL_FALSE == assemble_src(pAsm, 1, -1) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if( GL_FALSE == next_ins(pAsm) ) 
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    // EX2 dst.mask,          tmp.x
 +    // EX2 tmp.x,             tmp.x
 +    pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
 +    pAsm->D.dst.math = 1;
 +
 +    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
 +    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
 +    pAsm->D.dst.reg = tmp;
 +    nomask_PVSDST(&(pAsm->D.dst));
 +
 +    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
 +    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
 +    pAsm->S[0].src.reg = tmp;
 +    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
 +    noneg_PVSSRC(&(pAsm->S[0].src));
 +
 +    if( GL_FALSE == next_ins(pAsm) ) 
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    // Now replicate result to all necessary channels in destination
 +    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
 +
 +    if( GL_FALSE == assemble_dst(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
 +    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
 +    pAsm->S[0].src.reg   = tmp;
 +
 +    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
 +    noneg_PVSSRC(&(pAsm->S[0].src));
 +
 +    if( GL_FALSE == next_ins(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    return GL_TRUE;
 +}
 + 
 +GLboolean assemble_RCP(r700_AssemblerBase *pAsm) 
 +{
 +    return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE);
 +}
 + 
 +GLboolean assemble_RSQ(r700_AssemblerBase *pAsm) 
 +{
 +    return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
 +}
 + 
 +GLboolean assemble_SIN(r700_AssemblerBase *pAsm) 
 +{
 +    return assemble_math_function(pAsm, SQ_OP2_INST_SIN);
 +}
 + 
 +GLboolean assemble_SCS(r700_AssemblerBase *pAsm) 
 +{
 +    BITS tmp;
 +
 +      checkop1(pAsm);
 +
 +      tmp = gethelpr(pAsm);
 +
 +      // COS tmp.x,    a.x
 +      pAsm->D.dst.opcode = SQ_OP2_INST_COS;
 +      pAsm->D.dst.math = 1;
 +
 +      setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
 +      pAsm->D.dst.rtype = DST_REG_TEMPORARY;
 +      pAsm->D.dst.reg = tmp;
 +      pAsm->D.dst.writex = 1;
 +
 +      if( GL_FALSE == assemble_src(pAsm, 0, -1) )
 +      {
 +              return GL_FALSE;
 +      }
 +
 +      if ( GL_FALSE == next_ins(pAsm) )
 +      {
 +              return GL_FALSE;
 +      }
 +
 +      // SIN tmp.y,    a.x
 +      pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
 +      pAsm->D.dst.math = 1;
 +
 +      setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
 +      pAsm->D.dst.rtype = DST_REG_TEMPORARY;
 +      pAsm->D.dst.reg = tmp;
 +      pAsm->D.dst.writey = 1;
 +
 +      if( GL_FALSE == assemble_src(pAsm, 0, -1) )
 +      {
 +              return GL_FALSE;
 +      }
 +
 +      if( GL_FALSE == next_ins(pAsm) )
 +      {
 +              return GL_FALSE;
 +      }
 +
 +      // MOV dst.mask,     tmp
 +      pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
 +
 +      if( GL_FALSE == assemble_dst(pAsm) )
 +      {
 +              return GL_FALSE;
 +      }
 +
 +      setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
 +      pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
 +      pAsm->S[0].src.reg = tmp;
 +
 +      noswizzle_PVSSRC(&(pAsm->S[0].src));
 +      pAsm->S[0].src.swizzlez = SQ_SEL_0;
 +      pAsm->S[0].src.swizzlew = SQ_SEL_0;
 +
 +      if ( GL_FALSE == next_ins(pAsm) )
 +      {
 +              return GL_FALSE;
 +      }
 +
 +    return GL_TRUE;
 +}
 + 
 +GLboolean assemble_SGE(r700_AssemblerBase *pAsm) 
 +{
 +    if( GL_FALSE == checkop2(pAsm) )
 +    {
 +          return GL_FALSE;
 +    }
 +
 +    pAsm->D.dst.opcode = SQ_OP2_INST_SETGE;  
 +
 +    if( GL_FALSE == assemble_dst(pAsm) )
 +    {
 +          return GL_FALSE;
 +    }
 +
 +    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
 +    {
 +          return GL_FALSE;
 +    }
 +
 +    if( GL_FALSE == assemble_src(pAsm, 1, -1) )
 +    {
 +          return GL_FALSE;
 +    }
 +
 +    if( GL_FALSE == next_ins(pAsm) ) 
 +    {
 +          return GL_FALSE;
 +    }
 +
 +    return GL_TRUE;
 +}
 + 
 +GLboolean assemble_SLT(r700_AssemblerBase *pAsm) 
 +{
 +    if( GL_FALSE == checkop2(pAsm) )
 +    {
 +          return GL_FALSE;
 +    }
 +
 +    pAsm->D.dst.opcode = SQ_OP2_INST_SETGT;  
 +
 +    if( GL_FALSE == assemble_dst(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +                
 +    if( GL_FALSE == assemble_src(pAsm, 0, 1) )  
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if( GL_FALSE == assemble_src(pAsm, 1, 0) )  
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if( GL_FALSE == next_ins(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    return GL_TRUE;
 +}
 + 
 +GLboolean assemble_STP(r700_AssemblerBase *pAsm) 
 +{
 +    return GL_TRUE;
 +}
 + 
 +GLboolean assemble_TEX(r700_AssemblerBase *pAsm) 
 +{
 +    GLboolean src_const;
 +
 +    switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
 +    {
 +    case PROGRAM_CONSTANT:
 +    case PROGRAM_LOCAL_PARAM:
 +    case PROGRAM_ENV_PARAM:
 +    case PROGRAM_STATE_VAR:
 +        src_const = GL_TRUE;
 +    case PROGRAM_TEMPORARY:
 +    case PROGRAM_INPUT:
 +        src_const = GL_FALSE;
 +    }
 +
 +    if (GL_TRUE == src_const) 
 +    {
 +        r700_error(TODO_ASM_CONSTTEXADDR, "TODO: Texture coordinates from a constant register not supported.");
 +        return GL_FALSE;
 +    }
 +
 +    switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) 
 +    {
 +        case OPCODE_TEX:
 +            pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;            
 +            break;
 +        case OPCODE_TXB:            
 +            r700_error(TODO_ASM_TXB, "do not support TXB yet");
 +            return GL_FALSE;
 +            break;
 +        case OPCODE_TXP:            
 +            /* TODO : tex proj version : divid first 3 components by 4th */ 
 +            pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
 +            break;
 +        default:
 +            r700_error(ERROR_ASM_BADTEXINST, "Internal error: bad texture op (not TEX)");
 +            return GL_FALSE;
 +            break;
 +    }
 +
 +    // Set src1 to tex unit id
 +    pAsm->S[1].src.reg   = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit;
 +    pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
 +
 +    //No sw info from mesa compiler, so hard code here.
 +    pAsm->S[1].src.swizzlex = SQ_SEL_X;
 +    pAsm->S[1].src.swizzley = SQ_SEL_Y;
 +    pAsm->S[1].src.swizzlez = SQ_SEL_Z;
 +    pAsm->S[1].src.swizzlew = SQ_SEL_W;
 +
 +    if( GL_FALSE == tex_dst(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 + 
 +    if( GL_FALSE == tex_src(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if ( GL_FALSE == next_ins(pAsm) ) 
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean assemble_XPD(r700_AssemblerBase *pAsm) 
 +{
 +    BITS tmp;
 +
 +    if( GL_FALSE == checkop2(pAsm) )
 +    {
 +          return GL_FALSE;
 +    }
 +
 +    tmp = gethelpr(pAsm);
 +
 +    pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
 +
 +    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
 +    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
 +    pAsm->D.dst.reg   = tmp;
 +    nomask_PVSDST(&(pAsm->D.dst));
 +  
 +    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if( GL_FALSE == assemble_src(pAsm, 1, -1) )
 +    {
 +        return GL_FALSE;
 +    }
 + 
 +    swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
 +    swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
 +
 +    if( GL_FALSE == next_ins(pAsm) ) 
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
 +    pAsm->D.dst.op3    = 1;
 +
 +    if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
 +    {
 +        tmp = gethelpr(pAsm);
 +
 +        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
 +        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
 +        pAsm->D.dst.reg   = tmp;
 +
 +        nomask_PVSDST(&(pAsm->D.dst));
 +    }
 +    else 
 +    {
 +        if( GL_FALSE == assemble_dst(pAsm) )
 +        {
 +            return GL_FALSE;
 +        }
 +    }
 +
 +    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    if( GL_FALSE == assemble_src(pAsm, 1, -1) )
 +    {
 +        return GL_FALSE;
 +    }
 + 
 +    swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
 +    swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
 +
 +    // result1 + (neg) result0
 +    setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE);
 +    pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
 +    pAsm->S[2].src.reg   = tmp;
 +
 +    neg_PVSSRC(&(pAsm->S[2].src));
 +    noswizzle_PVSSRC(&(pAsm->S[2].src));
 +
 +    if( GL_FALSE == next_ins(pAsm) ) 
 +    {
 +        return GL_FALSE;
 +    }
 +
 +
 +    if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) 
 +    {
 +        if( GL_FALSE == assemble_dst(pAsm) )
 +        {
 +            return GL_FALSE;
 +        }
 +
 +        pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
 +
 +        // Use tmp as source
 +        setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
 +        pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
 +        pAsm->S[0].src.reg   = tmp;
 +
 +        noneg_PVSSRC(&(pAsm->S[0].src));
 +        noswizzle_PVSSRC(&(pAsm->S[0].src));
 +
 +        if( GL_FALSE == next_ins(pAsm) )
 +        {
 +            return GL_FALSE;
 +        }
 +    }
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
 +{
 +    return GL_TRUE;
 +}
 +
 +GLboolean assemble_IF(r700_AssemblerBase *pAsm)
 +{
 +    return GL_TRUE;
 +}
 +
 +GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
 +{
 +    return GL_TRUE;
 +}
 +
 +GLboolean AssembleInstr(GLuint uiNumberInsts,
 +                        struct prog_instruction *pILInst, 
 +                                              r700_AssemblerBase *pR700AsmCode)
 +{
 +    GLuint i;
 +
 +    pR700AsmCode->pILInst = pILInst;
 +      for(i=0; i<uiNumberInsts; i++)
 +    {
 +        pR700AsmCode->uiCurInst = i;
 +
 +        switch (pILInst[i].Opcode)
 +        {
 +        case OPCODE_ABS: 
 +            if ( GL_FALSE == assemble_ABS(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;  
 +        case OPCODE_ADD: 
 +        case OPCODE_SUB: 
 +            if ( GL_FALSE == assemble_ADD(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;  
 +
 +        case OPCODE_ARL: 
 +            r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_ARL ");
 +            //if ( GL_FALSE == assemble_BAD("ARL") ) 
 +                return GL_FALSE;
 +            break;
 +        case OPCODE_ARR: 
 +            r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_ARR ");
 +            //if ( GL_FALSE == assemble_BAD("ARR") ) 
 +                return GL_FALSE;
 +            break;
 +
 +        case OPCODE_CMP: 
 +            if ( GL_FALSE == assemble_CMP(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;  
 +        case OPCODE_COS: 
 +            if ( GL_FALSE == assemble_COS(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;  
 +
 +        case OPCODE_DP3: 
 +        case OPCODE_DP4: 
 +        case OPCODE_DPH: 
 +            if ( GL_FALSE == assemble_DOT(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;  
 +
 +        case OPCODE_DST: 
 +            if ( GL_FALSE == assemble_DST(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;  
 +
 +        case OPCODE_EX2: 
 +            if ( GL_FALSE == assemble_EX2(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;  
 +        case OPCODE_EXP: 
 +            r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_EXP ");
 +            //if ( GL_FALSE == assemble_BAD("EXP") ) 
 +                return GL_FALSE;
 +            break; // approx of EX2
 +
 +        case OPCODE_FLR:     
 +            if ( GL_FALSE == assemble_FLR(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;  
 +        //case OP_FLR_INT: 
 +        //    if ( GL_FALSE == assemble_FLR_INT() ) 
 +        //        return GL_FALSE;
 +        //    break;  
 +
 +        case OPCODE_FRC: 
 +            if ( GL_FALSE == assemble_FRC(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;  
 +
 +        case OPCODE_KIL: 
 +            if ( GL_FALSE == assemble_KIL(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;
 +        case OPCODE_LG2: 
 +            if ( GL_FALSE == assemble_LG2(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;  
 +        case OPCODE_LIT:
 +            if ( GL_FALSE == assemble_LIT(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;
 +        case OPCODE_LRP: 
 +            if ( GL_FALSE == assemble_LRP(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;  
 +        case OPCODE_LOG: 
 +            r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_LOG ");
 +            //if ( GL_FALSE == assemble_BAD("LOG") ) 
 +                return GL_FALSE;
 +            break; // approx of LG2
 +
 +        case OPCODE_MAD: 
 +            if ( GL_FALSE == assemble_MAD(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;  
 +        case OPCODE_MAX: 
 +            if ( GL_FALSE == assemble_MAX(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;  
 +        case OPCODE_MIN: 
 +            if ( GL_FALSE == assemble_MIN(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;  
 +
 +        case OPCODE_MOV: 
 +            if ( GL_FALSE == assemble_MOV(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;  
 +        case OPCODE_MUL: 
 +            if ( GL_FALSE == assemble_MUL(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break; 
 +
 +        case OPCODE_POW: 
 +            if ( GL_FALSE == assemble_POW(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;  
 +        case OPCODE_RCP: 
 +            if ( GL_FALSE == assemble_RCP(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;  
 +        case OPCODE_RSQ: 
 +            if ( GL_FALSE == assemble_RSQ(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;  
 +        case OPCODE_SIN: 
 +            if ( GL_FALSE == assemble_SIN(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;  
 +        case OPCODE_SCS: 
 +            if ( GL_FALSE == assemble_SCS(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;  
 +
 +        case OPCODE_SGE: 
 +            if ( GL_FALSE == assemble_SGE(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break; 
 +        case OPCODE_SLT: 
 +            if ( GL_FALSE == assemble_SLT(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break; 
 +
 +        //case OP_STP: 
 +        //    if ( GL_FALSE == assemble_STP(pR700AsmCode) ) 
 +        //        return GL_FALSE;
 +        //    break;
 +
 +        case OPCODE_SWZ: 
 +            if ( GL_FALSE == assemble_MOV(pR700AsmCode) ) 
 +            {
 +                return GL_FALSE; 
 +            }
 +            else
 +            {
 +                if( (i+1)<uiNumberInsts )
 +                {
 +                    if(OPCODE_END != pILInst[i+1].Opcode)
 +                    {
 +                        if( GL_TRUE == IsTex(pILInst[i+1].Opcode) )
 +                        {
 +                            pR700AsmCode->pInstDeps[i+1].nDstDep = i+1; //=1?
 +                        }
 +                    }
 +                }
 +            }
 +            break;
 +
 +        case OPCODE_TEX: 
 +        case OPCODE_TXB:  
 +        case OPCODE_TXP: 
 +            if ( GL_FALSE == assemble_TEX(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;
 +
 +        case OPCODE_XPD: 
 +            if ( GL_FALSE == assemble_XPD(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;  
 +
 +        case OPCODE_IF   : 
 +            if ( GL_FALSE == assemble_IF(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;
 +        case OPCODE_ELSE : 
 +            r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_ELSE ");
 +            //if ( GL_FALSE == assemble_BAD("ELSE") ) 
 +                return GL_FALSE;
 +            break;
 +        case OPCODE_ENDIF: 
 +            if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) ) 
 +                return GL_FALSE;
 +            break;
 +
 +        //case OPCODE_EXPORT: 
 +        //    if ( GL_FALSE == assemble_EXPORT() ) 
 +        //        return GL_FALSE;
 +        //    break;
 +
 +        case OPCODE_END: 
 +                      //pR700AsmCode->uiCurInst = i;
 +                      //This is to remaind that if in later exoort there is depth/stencil
 +                      //export, we need a mov to re-arrange DST channel, where using a
 +                      //psuedo inst, we will use this end inst to do it.
 +            return GL_TRUE;
 +
 +        default:
 +            r700_error(ERROR_ASM_UNKNOWNILINST, "internal: unknown instruction");
 +            return GL_FALSE;
 +        }
 +    }
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean Process_Export(r700_AssemblerBase* pAsm,
 +                         GLuint type,
 +                         GLuint export_starting_index,
 +                         GLuint export_count, 
 +                         GLuint starting_register_number,
 +                         GLboolean is_depth_export)
 +{
 +    unsigned char ucWriteMask;
 +
 +    check_current_clause(pAsm, CF_EMPTY_CLAUSE);
 +    check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr
 +
 +    pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type;
 +
 +    switch (type) 
 +    {
 +        case SQ_EXPORT_PIXEL:
 +            if(GL_TRUE == is_depth_export) 
 +            {
 +                pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base  = SQ_CF_PIXEL_Z;
 +            }
 +            else 
 +            {
 +                pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base  = SQ_CF_PIXEL_MRT0 + export_starting_index;
 +            }
 +            break;
 +
 +        case SQ_EXPORT_POS:
 +            pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base  = SQ_CF_POS_0 + export_starting_index; 
 +            break;
 +
 +        case SQ_EXPORT_PARAM:
 +            pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base  = 0x0 + export_starting_index; 
 +            break;
 +
 +        default:
 +            r700_error(ERROR_ASM_BADEXPORTTYPE, "Unknown export type: %d", type);
 +            return GL_FALSE;
 +            break;
 +    }
 +
 +    pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr      = starting_register_number;
 +
 +    pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel      = SQ_ABSOLUTE;
 +    pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr   = 0x0;
 +    pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size   = 0x3; 
 +
 +    pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count      = (export_count - 1);
 +    pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program   = 0x0;
 +    pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
 +    pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_EXPORT;  // _DONE
 +    pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
 +    pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier          = 0x1;
 +
 +    if (export_count == 1) 
 +    {
 +        ucWriteMask = pAsm->pucOutMask[starting_register_number - pAsm->starting_export_register_number];
 +
 +        if( (ucWriteMask & 0x1) != 0)
 +        {
 +            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
 +        }
 +        else
 +        {
 +            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_MASK;
 +        }
 +        if( ((ucWriteMask>>1) & 0x1) != 0)
 +        {
 +            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
 +        }
 +        else
 +        {
 +            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK;
 +        }
 +        if( ((ucWriteMask>>2) & 0x1) != 0)
 +        {
 +            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
 +        }
 +        else
 +        {
 +            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK;
 +        }
 +        if( ((ucWriteMask>>3) & 0x1) != 0)
 +        {
 +            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
 +        }
 +        else
 +        {
 +            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK;
 +        }
 +    }
 +    else 
 +    {
 +        // This should only be used if all components for all registers have been written
 +        pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
 +        pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
 +        pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
 +        pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
 +    }
 +
 +    pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr;
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select)
 +{
 +      gl_inst_opcode Opcode_save = pAsm->pILInst[pAsm->uiCurInst].Opcode; //Should be OPCODE_END
 +    pAsm->pILInst[pAsm->uiCurInst].Opcode = OPCODE_MOV;
 +
 +    // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
 +
 +    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
 +
 +    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
 +    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
 +    pAsm->D.dst.reg   = pAsm->depth_export_register_number;
 +
 +    pAsm->D.dst.writex = 1;   // depth          goes in R channel for HW                       
 +
 +    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
 +    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
 +    pAsm->S[0].src.reg   = pAsm->depth_export_register_number;
 +
 +    setswizzle_PVSSRC(&(pAsm->S[0].src), depth_channel_select);
 +
 +    noneg_PVSSRC(&(pAsm->S[0].src));
 +
 +    if( GL_FALSE == next_ins(pAsm) )
 +    {
 +        return GL_FALSE;
 +    }
 +
 +    pAsm->pILInst[pAsm->uiCurInst].Opcode = Opcode_save;
 +
 +    return GL_TRUE;
 +}
 + 
 +GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
 +                                   GLbitfield          OutputsWritten)  
 +{ 
 +    unsigned int unBit;
 +
 +    if(pR700AsmCode->depth_export_register_number >= 0) 
 +    {
 +        if( GL_FALSE == Move_Depth_Exports_To_Correct_Channels(pR700AsmCode, SQ_SEL_Z) )  // depth
 +              {
 +                      return GL_FALSE;
 +              }
 +    }
 +
 +    unBit = 1 << FRAG_RESULT_COLOR;
 +      if(OutputsWritten & unBit)
 +      {
 +              if( GL_FALSE == Process_Export(pR700AsmCode,
 +                                       SQ_EXPORT_PIXEL, 
 +                                       0, 
 +                                       1, 
 +                                       pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_COLOR], 
 +                                       GL_FALSE) ) 
 +        {
 +            return GL_FALSE;
 +        }
 +      }
 +      unBit = 1 << FRAG_RESULT_DEPTH;
 +      if(OutputsWritten & unBit)
 +      {
 +        if( GL_FALSE == Process_Export(pR700AsmCode,
 +                                       SQ_EXPORT_PIXEL, 
 +                                       0, 
 +                                       1, 
 +                                       pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_DEPTH], 
 +                                       GL_TRUE)) 
 +        {
 +            return GL_FALSE;
 +        }
 +      }
 +
 +    if(pR700AsmCode->cf_last_export_ptr != NULL) 
 +    {
 +        pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst        = SQ_CF_INST_EXPORT_DONE;
 +        pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
 +    }
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
 +                                 GLbitfield          OutputsWritten)  
 +{
 +    unsigned int unBit;
 +    unsigned int i;
 +
 +    GLuint export_starting_index  = 0;
 +    GLuint export_count           = pR700AsmCode->number_of_exports;
 +
 +    unBit = 1 << VERT_RESULT_HPOS;
 +      if(OutputsWritten & unBit)
 +      {
 +        if( GL_FALSE == Process_Export(pR700AsmCode, 
 +                                       SQ_EXPORT_POS, 
 +                                       export_starting_index, 
 +                                       1, 
 +                                       pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS],
 +                                       GL_FALSE) )
 +        {
 +            return GL_FALSE;
 +        }
 +
 +        export_count--;
 +
 +        pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
 +      }
 +
 +    pR700AsmCode->number_of_exports = export_count;
 +
 +      unBit = 1 << VERT_RESULT_COL0;
 +      if(OutputsWritten & unBit)
 +      {
 +        if( GL_FALSE == Process_Export(pR700AsmCode, 
 +                                       SQ_EXPORT_PARAM, 
 +                                       export_starting_index, 
 +                                       1, 
 +                                       pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0],
 +                                       GL_FALSE) )
 +        {
 +            return GL_FALSE;
 +        }
 +
 +        export_starting_index++;
 +      }
 +
 +      unBit = 1 << VERT_RESULT_COL1;
 +      if(OutputsWritten & unBit)
 +      {
 +        if( GL_FALSE == Process_Export(pR700AsmCode, 
 +                                       SQ_EXPORT_PARAM, 
 +                                       export_starting_index, 
 +                                       1, 
 +                                       pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1],
 +                                       GL_FALSE) )
 +        {
 +            return GL_FALSE;
 +        }
 +
 +        export_starting_index++;
 +      }
 +
 +      for(i=0; i<8; i++)
 +      {
 +              unBit = 1 << (VERT_RESULT_TEX0 + i);
 +              if(OutputsWritten & unBit)
 +              {
 +            if( GL_FALSE == Process_Export(pR700AsmCode,
 +                                          SQ_EXPORT_PARAM, 
 +                                          export_starting_index, 
 +                                          1, 
 +                                          pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i],
 +                                          GL_FALSE) )
 +            {
 +                return GL_FALSE;
 +            }
 +
 +            export_starting_index++;
 +              }
 +      }
 +
 +    // At least one param should be exported
 +    if (export_count) 
 +    {
 +        pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;    
 +    }
 +    else
 +    {
 +        if( GL_FALSE == Process_Export(pR700AsmCode,
 +                                       SQ_EXPORT_PARAM, 
 +                                       0, 
 +                                       1, 
 +                                       pR700AsmCode->starting_export_register_number,
 +                                       GL_FALSE) )
 +        {
 +            return GL_FALSE;
 +        }
 +      
 +        pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0;
 +        pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0;
 +        pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0;
 +        pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1;
 +        pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
 +    }
 +
 +    pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
 +
 +    return GL_TRUE;
 +}
 +
 +GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
 +{
 +    FREE(pR700AsmCode->pucOutMask);
 +    FREE(pR700AsmCode->pInstDeps);
 +    return GL_TRUE;
 +}
 +
index b62aa66eb290db079c02ede45428a93e53a7b284,0000000000000000000000000000000000000000..f142aafd03e4874a164e2c62735331426ddbcc6d
mode 100644,000000..100644
--- /dev/null
@@@ -1,127 -1,0 +1,127 @@@
-     __DRIdrawablePrivate *dPriv = context->radeon.dri.drawable;
 +/*
 + * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the "Software"),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice shall be included
 + * in all copies or substantial portions of the Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
 + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 + */
 +
 +/*
 + * Authors:
 + *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
 + *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
 + */
 + 
 +#include "main/glheader.h"
 +#include "main/context.h"
 +#include "main/macros.h"
 +#include "main/imports.h"
 +#include "main/mtypes.h"
 +#include "main/enums.h"
 +
 +#include "r600_context.h"
 +
 +#include "r700_shaderinst.h"
 +#include "r600_emit.h"
 +
 +static GLboolean r700ClearFast(context_t *context, GLbitfield mask)
 +{
 +    /* TODO, fast clear need implementation */
 +    return GL_FALSE;
 +}
 +
 +static void r700UserClear(GLcontext *ctx, GLuint mask)
 +{
 +      radeon_clear_tris(ctx, mask);
 +}
 +
 +#define R600_NEWPRIM( rmesa )                 \
 +  do {                                                \
 +  if ( rmesa->radeon.dma.flush )                      \
 +    rmesa->radeon.dma.flush( rmesa->radeon.glCtx );   \
 +  } while (0)
 +
 +void r700Clear(GLcontext * ctx, GLbitfield mask)
 +{
 +    context_t *context = R700_CONTEXT(ctx);
++    __DRIdrawablePrivate *dPriv = radeon_get_drawable(&context->radeon);
 +    const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask);
 +    GLbitfield swrast_mask = 0, tri_mask = 0;
 +    int i;
 +    struct gl_framebuffer *fb = ctx->DrawBuffer;
 +
 +    /* XXX FIXME */
 +    return;
 +
 +    if( GL_TRUE == r700ClearFast(context, mask) )
 +    {
 +        return;
 +    }
 +
 +#if 0
 +      if (!context->radeon.radeonScreen->driScreen->dri2.enabled) {
 +              LOCK_HARDWARE(&context->radeon);
 +              UNLOCK_HARDWARE(&context->radeon);
 +              if (dPriv->numClipRects == 0)
 +                      return;
 +      }
 +#endif
 +
 +      R600_NEWPRIM(context);
 +
 +      if (colorMask == ~0)
 +        tri_mask |= (mask & BUFFER_BITS_COLOR);
 +
 +
 +      /* HW stencil */
 +      if (mask & BUFFER_BIT_STENCIL) {
 +              tri_mask |= BUFFER_BIT_STENCIL;
 +      }
 +
 +      /* HW depth */
 +      if (mask & BUFFER_BIT_DEPTH) {
 +              tri_mask |= BUFFER_BIT_DEPTH;
 +      }
 +
 +      /* If we're doing a tri pass for depth/stencil, include a likely color
 +       * buffer with it.
 +       */
 +
 +      for (i = 0; i < BUFFER_COUNT; i++) {
 +        GLuint bufBit = 1 << i;
 +        if ((tri_mask) & bufBit) {
 +          if (!fb->Attachment[i].Renderbuffer->ClassID) {
 +            tri_mask &= ~bufBit;
 +            swrast_mask |= bufBit;
 +          }
 +        }
 +      }
 +
 +      /* SW fallback clearing */
 +      swrast_mask = mask & ~tri_mask;
 +
 +      if (tri_mask)
 +              r700UserClear(ctx, tri_mask);
 +      if (swrast_mask) {
 +              if (RADEON_DEBUG & DEBUG_FALLBACKS)
 +                      fprintf(stderr, "%s: swrast clear, mask: %x\n",
 +                              __FUNCTION__, swrast_mask);
 +              _swrast_Clear(ctx, swrast_mask);
 +      }
 +
 +}
 +
 +
index 404e946f7caf056905b3058999a1fd9776cfa1c2,0000000000000000000000000000000000000000..329a4aa179a35b23aa6b618361ca9687c3d0d218
mode 100644,000000..100644
--- /dev/null
@@@ -1,1228 -1,0 +1,1228 @@@
-       __DRIdrawablePrivate *dPriv = context->radeon.dri.drawable;
 +/*
 + * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the "Software"),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice shall be included
 + * in all copies or substantial portions of the Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
 + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 + */
 +
 +/*
 + * Authors:
 + *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
 + */
 +
 +#include "main/glheader.h"
 +#include "main/mtypes.h"
 +#include "main/state.h"
 +#include "main/imports.h"
 +#include "main/enums.h"
 +#include "main/macros.h"
 +#include "main/dd.h"
 +#include "main/simple_list.h"
 +
 +#include "tnl/tnl.h"
 +#include "tnl/t_pipeline.h"
 +#include "tnl/t_vp_build.h"
 +#include "swrast/swrast.h"
 +#include "swrast_setup/swrast_setup.h"
 +#include "main/api_arrayelt.h"
 +#include "main/state.h"
 +#include "main/framebuffer.h"
 +
 +#include "shader/prog_parameter.h"
 +#include "shader/prog_statevars.h"
 +#include "vbo/vbo.h"
 +#include "main/texformat.h"
 +
 +#include "r600_context.h"
 +
 +#include "r700_state.h"
 +
 +#include "r700_fragprog.h"
 +#include "r700_vertprog.h"
 +
 +
 +void r700SetDefaultStates(context_t *context) //--------------------
 +{
 +    
 +}
 +
 +void r700UpdateShaders (GLcontext * ctx)  //----------------------------------
 +{
 +    context_t *context = R700_CONTEXT(ctx);
 +
 +    GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
 +    GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
 +
 +    struct r700_vertex_program *vp;
 +      int i;
 +
 +    if (context->radeon.NewGLState) 
 +    {
 +        context->radeon.NewGLState = 0;
 +
 +        for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) 
 +        {
 +            /* mat states from state var not array for sw */
 +            dummy_attrib[i].stride = 0;
 +
 +            temp_attrib[i] = TNL_CONTEXT(ctx)->vb.AttribPtr[i];
 +            TNL_CONTEXT(ctx)->vb.AttribPtr[i] = &(dummy_attrib[i]);
 +        }
 +
 +        _tnl_UpdateFixedFunctionProgram(ctx);
 +
 +        for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) 
 +        {
 +            TNL_CONTEXT(ctx)->vb.AttribPtr[i] = temp_attrib[i];
 +        }
 +
 +        r700SelectVertexShader(ctx);
 +        vp = (struct r700_vertex_program *)ctx->VertexProgram._Current;
 +
 +        if (vp->translated == GL_FALSE) 
 +        {
 +            // TODO
 +            //fprintf(stderr, "Failing back to sw-tcl\n");
 +            //hw_tcl_on = future_hw_tcl_on = 0;
 +            //r300ResetHwState(rmesa);
 +            //
 +            r700UpdateStateParameters(ctx, _NEW_PROGRAM);
 +            return;
 +        }
 +    }
 +
 +    r700UpdateStateParameters(ctx, _NEW_PROGRAM);
 +}
 +
 +/*
 + * To correctly position primitives:
 + */
 +void r700UpdateViewportOffset(GLcontext * ctx) //------------------
 +{
 +
 +      //radeonUpdateScissor(ctx);
 +
 +    return;
 +}
 +
 +/**
 + * Tell the card where to render (offset, pitch).
 + * Effected by glDrawBuffer, etc
 + */
 +void r700UpdateDrawBuffer(GLcontext * ctx) /* TODO */ //---------------------
 +{
 +#if 0 /* to be enabled */
 +    context_t *context = R700_CONTEXT(ctx);
 +
 +    switch (ctx->DrawBuffer->_ColorDrawBufferIndexes[0]) 
 +    {
 +      case BUFFER_FRONT_LEFT:
 +          context->target.rt = context->screen->frontBuffer;
 +          break;
 +      case BUFFER_BACK_LEFT:
 +          context->target.rt = context->screen->backBuffer;
 +          break;
 +      default:
 +          memset (&context->target.rt, sizeof(context->target.rt), 0);
 +      }
 +#endif /* to be enabled */
 +}
 +
 +static void r700FetchStateParameter(GLcontext * ctx,
 +                                              const gl_state_index state[STATE_LENGTH],
 +                                              GLfloat * value)
 +{
 +      context_t *context = R700_CONTEXT(ctx);
 +
 +    /* TODO */
 +}
 +
 +void r700UpdateStateParameters(GLcontext * ctx, GLuint new_state) //--------------------
 +{
 +      struct r700_fragment_program *fp;
 +      struct gl_program_parameter_list *paramList;
 +      GLuint i;
 +
 +      if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM)))
 +              return;
 +
 +      fp = (struct r700_fragment_program *)ctx->FragmentProgram._Current;
 +      if (!fp)
 +    {
 +              return;
 +    }
 +
 +      paramList = fp->mesa_program.Base.Parameters;
 +
 +      if (!paramList)
 +    {
 +              return;
 +    }
 +
 +      for (i = 0; i < paramList->NumParameters; i++) 
 +    {
 +              if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) 
 +        {
 +                      r700FetchStateParameter(ctx,
 +                                              paramList->Parameters[i].
 +                                              StateIndexes,
 +                                              paramList->ParameterValues[i]);
 +              }
 +      }
 +}
 +
 +/**
 + * Called by Mesa after an internal state update.
 + */
 +static void r700InvalidateState(GLcontext * ctx, GLuint new_state) //-------------------
 +{
 +    context_t *context = R700_CONTEXT(ctx);
 +
 +    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
 +
 +    _swrast_InvalidateState(ctx, new_state);
 +      _swsetup_InvalidateState(ctx, new_state);
 +      _vbo_InvalidateState(ctx, new_state);
 +      _tnl_InvalidateState(ctx, new_state);
 +      _ae_invalidate_state(ctx, new_state);
 +
 +      if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) 
 +    {
 +        _mesa_update_framebuffer(ctx);
 +              /* this updates the DrawBuffer's Width/Height if it's a FBO */
 +              _mesa_update_draw_buffer_bounds(ctx);
 +
 +              r700UpdateDrawBuffer(ctx);
 +      }
 +
 +      r700UpdateStateParameters(ctx, new_state);
 +
 +    if(GL_TRUE == r700->bEnablePerspective)
 +    {
 +        /* Do scale XY and Z by 1/W0 for perspective correction on pos. For orthogonal case, set both to one. */
 +        CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit);
 +        CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit);
 +
 +        SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit);
 +
 +        SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
 +        CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
 +    }
 +    else
 +    {
 +        /* For orthogonal case. */
 +        SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit);
 +        SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit);
 +
 +        SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit);
 +
 +        CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
 +        SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
 +    }
 +
 +      context->radeon.NewGLState |= new_state;
 +}
 +
 +static void r700SetDepthState(GLcontext * ctx)
 +{
 +      context_t *context = R700_CONTEXT(ctx);
 +
 +    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
 +
 +    if (ctx->Depth.Test)
 +    {
 +        SETbit(r700->DB_DEPTH_CONTROL.u32All, Z_ENABLE_bit);
 +        if (ctx->Depth.Mask)
 +        {
 +            SETbit(r700->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit);
 +        }
 +        else
 +        {
 +            CLEARbit(r700->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit);
 +        }
 +
 +        switch (ctx->Depth.Func)
 +        {
 +        case GL_NEVER:            
 +            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_NEVER, 
 +                     ZFUNC_shift, ZFUNC_mask);
 +            break;
 +        case GL_LESS:
 +            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_LESS, 
 +                     ZFUNC_shift, ZFUNC_mask);            
 +            break;
 +        case GL_EQUAL:
 +            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_EQUAL, 
 +                     ZFUNC_shift, ZFUNC_mask);
 +            break;
 +        case GL_LEQUAL:
 +            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_LEQUAL,  
 +                     ZFUNC_shift, ZFUNC_mask);
 +            break;
 +        case GL_GREATER:
 +            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_GREATER,  
 +                     ZFUNC_shift, ZFUNC_mask);           
 +            break;
 +        case GL_NOTEQUAL:
 +            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_NOTEQUAL,  
 +                     ZFUNC_shift, ZFUNC_mask);
 +            break;
 +        case GL_GEQUAL:
 +            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_GEQUAL,  
 +                     ZFUNC_shift, ZFUNC_mask);
 +            break;
 +        case GL_ALWAYS:
 +            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_ALWAYS,  
 +                     ZFUNC_shift, ZFUNC_mask);
 +            break;
 +        default:
 +            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_ALWAYS,  
 +                     ZFUNC_shift, ZFUNC_mask);
 +            break;
 +        }
 +    }
 +    else
 +    {
 +        CLEARbit(r700->DB_DEPTH_CONTROL.u32All, Z_ENABLE_bit);
 +        CLEARbit(r700->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit);
 +    }
 +}
 +
 +static void r700AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref) //---------------
 +{
 +}
 +
 +
 +static void r700BlendColor(GLcontext * ctx, const GLfloat cf[4]) //----------------
 +{
 +}
 +
 +static void r700BlendEquationSeparate(GLcontext * ctx,
 +                                                    GLenum modeRGB, GLenum modeA) //-----------------
 +{
 +}
 +
 +static void r700BlendFuncSeparate(GLcontext * ctx,
 +                                GLenum sfactorRGB, GLenum dfactorRGB,
 +                                GLenum sfactorA, GLenum dfactorA) //------------------------
 +{
 +}
 +
 +static void r700UpdateCulling(GLcontext * ctx)
 +{
 +    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw);
 +
 +    CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit);
 +    CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
 +    CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
 +
 +    if (ctx->Polygon.CullFlag) 
 +    {
 +        switch (ctx->Polygon.CullFaceMode) 
 +        {
 +        case GL_FRONT:            
 +            SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
 +            CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
 +            break;
 +        case GL_BACK:
 +            CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
 +            SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
 +            break;
 +        case GL_FRONT_AND_BACK:
 +            SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
 +            SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
 +            break;
 +        default:
 +            CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
 +            CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
 +            break;
 +        }
 +    }
 +
 +    switch (ctx->Polygon.FrontFace) 
 +    {
 +        case GL_CW:
 +            SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit);
 +            break;
 +        case GL_CCW:
 +            CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); 
 +            break;
 +        default:
 +            CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); /* default: ccw */
 +            break;
 +    }
 +}
 +
 +static void r700Enable(GLcontext * ctx, GLenum cap, GLboolean state) //------------------
 +{
 +      context_t *context = R700_CONTEXT(ctx);
 +
 +      switch (cap) {
 +      case GL_TEXTURE_1D:
 +      case GL_TEXTURE_2D:
 +      case GL_TEXTURE_3D:
 +              /* empty */
 +              break;
 +      case GL_FOG:
 +              /* empty */
 +              break;
 +      case GL_ALPHA_TEST:
 +              //r700SetAlphaState(ctx);
 +              break;
 +      case GL_COLOR_LOGIC_OP:
 +              //r700SetLogicOpState(ctx);
 +              /* fall-through, because logic op overrides blending */
 +      case GL_BLEND:
 +              //r700SetBlendState(ctx);
 +              break;
 +      case GL_CLIP_PLANE0:
 +      case GL_CLIP_PLANE1:
 +      case GL_CLIP_PLANE2:
 +      case GL_CLIP_PLANE3:
 +      case GL_CLIP_PLANE4:
 +      case GL_CLIP_PLANE5:
 +              //r700SetClipPlaneState(ctx, cap, state);
 +              break;
 +      case GL_DEPTH_TEST:
 +              r700SetDepthState(ctx);
 +              break;
 +      case GL_STENCIL_TEST:
 +              //r700SetStencilState(ctx, state);
 +              break;
 +      case GL_CULL_FACE:
 +              r700UpdateCulling(ctx);
 +              break;
 +      case GL_POLYGON_OFFSET_POINT:
 +      case GL_POLYGON_OFFSET_LINE:
 +      case GL_POLYGON_OFFSET_FILL:
 +              //r700SetPolygonOffsetState(ctx, state);
 +              break;
 +      case GL_SCISSOR_TEST:
 +              radeon_firevertices(&context->radeon);
 +              context->radeon.state.scissor.enabled = state;
 +              radeonUpdateScissor(ctx);
 +              break;
 +      default:
 +              break;
 +      }
 +
 +}
 +
 +/**
 + * Handle glColorMask()
 + */
 +static void r700ColorMask(GLcontext * ctx,
 +                        GLboolean r, GLboolean g, GLboolean b, GLboolean a) //------------------
 +{
 +      R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw);
 +      unsigned int mask = ((r ? 1 : 0) |
 +                           (g ? 2 : 0) |
 +                           (b ? 4 : 0) |
 +                           (a ? 8 : 0));
 +
 +      if (mask != r700->CB_SHADER_MASK.u32All)
 +              SETfield(r700->CB_SHADER_MASK.u32All, mask, OUTPUT0_ENABLE_shift, OUTPUT0_ENABLE_mask);
 +}
 +
 +/**
 + * Change the depth testing function.
 + *
 + * \note Mesa already filters redundant calls to this function.
 + */
 +static void r700DepthFunc(GLcontext * ctx, GLenum func) //--------------------
 +{
 +    r700SetDepthState(ctx);
 +}
 +
 +/**
 + * Enable/Disable depth writing.
 + *
 + * \note Mesa already filters redundant calls to this function.
 + */
 +static void r700DepthMask(GLcontext * ctx, GLboolean mask) //------------------
 +{
 +    r700SetDepthState(ctx);
 +}
 +
 +/**
 + * Change the culling mode.
 + *
 + * \note Mesa already filters redundant calls to this function.
 + */
 +static void r700CullFace(GLcontext * ctx, GLenum mode) //-----------------
 +{
 +    r700UpdateCulling(ctx);
 +}
 +
 +/* =============================================================
 + * Fog
 + */
 +static void r700Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) //--------------
 +{
 +}
 +
 +/**
 + * Change the polygon orientation.
 + *
 + * \note Mesa already filters redundant calls to this function.
 + */
 +static void r700FrontFace(GLcontext * ctx, GLenum mode) //------------------
 +{
 +    r700UpdateCulling(ctx);
 +}
 +
 +static void r700ShadeModel(GLcontext * ctx, GLenum mode) //--------------------
 +{
 +      context_t *context = R700_CONTEXT(ctx);
 +      R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
 +
 +      /* also need to set/clear FLAT_SHADE bit per param in SPI_PS_INPUT_CNTL_[0-31] */
 +      switch (mode) {
 +      case GL_FLAT:
 +              SETbit(r700->SPI_INTERP_CONTROL_0.u32All, FLAT_SHADE_ENA_bit);
 +              break;
 +      case GL_SMOOTH:
 +              CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, FLAT_SHADE_ENA_bit);
 +              break;
 +      default:
 +              return;
 +      }
 +}
 +
 +static void r700PointParameter(GLcontext * ctx, GLenum pname, const GLfloat * param) //---------------
 +{
 +}
 +
 +static void r700StencilFuncSeparate(GLcontext * ctx, GLenum face,
 +                                  GLenum func, GLint ref, GLuint mask) //---------------------
 +{
 +}
 +
 +
 +static void r700StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) //--------------
 +{
 +}
 +
 +static void r700StencilOpSeparate(GLcontext * ctx, GLenum face,
 +                                GLenum fail, GLenum zfail, GLenum zpass) //--------------------
 +{
 +}
 +
 +static void r700UpdateWindow(GLcontext * ctx, int id) //--------------------
 +{
 +
 +      context_t *context = R700_CONTEXT(ctx);
 +      R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
++      __DRIdrawablePrivate *dPriv = radeon_get_drawable(&context->radeon);
 +      GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
 +      GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
 +      const GLfloat *v = ctx->Viewport._WindowMap.m;
 +      const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
 +      const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
 +      GLfloat y_scale, y_bias;
 +
 +      if (render_to_fbo) {
 +              y_scale = 1.0;
 +              y_bias = 0;
 +      } else {
 +              y_scale = -1.0;
 +              y_bias = yoffset;
 +      }
 +
 +      GLfloat sx = v[MAT_SX];
 +      GLfloat tx = v[MAT_TX] + xoffset;
 +      GLfloat sy = v[MAT_SY] * y_scale;
 +      GLfloat ty = (v[MAT_TY] * y_scale) + y_bias;
 +      GLfloat sz = v[MAT_SZ] * depthScale;
 +      GLfloat tz = v[MAT_TZ] * depthScale;
 +
 +      /* TODO : Need DMA flush as well. */
 +
 +      r700->viewport[id].PA_CL_VPORT_XSCALE.f32All  = sx;
 +      r700->viewport[id].PA_CL_VPORT_XOFFSET.f32All = tx;
 +
 +      r700->viewport[id].PA_CL_VPORT_YSCALE.f32All  = sy;
 +      r700->viewport[id].PA_CL_VPORT_YOFFSET.f32All = ty;
 +
 +      r700->viewport[id].PA_CL_VPORT_ZSCALE.f32All  = sz;
 +      r700->viewport[id].PA_CL_VPORT_ZOFFSET.f32All = tz;
 +
 +      r700->viewport[id].enabled = GL_TRUE;
 +}
 +
 +
 +static void r700Viewport(GLcontext * ctx,
 +                         GLint x,
 +                         GLint y,
 +                       GLsizei width,
 +                         GLsizei height) //--------------------
 +{
 +      r700UpdateWindow(ctx, 0);
 +
 +      radeon_viewport(ctx, x, y, width, height);
 +}
 +
 +static void r700DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval) //-------------
 +{
 +      r700UpdateWindow(ctx, 0);
 +}
 +
 +static void r700PointSize(GLcontext * ctx, GLfloat size) //-------------------
 +{
 +}
 +
 +static void r700LineWidth(GLcontext * ctx, GLfloat widthf) //---------------
 +{
 +}
 +
 +static void r700PolygonOffset(GLcontext * ctx, GLfloat factor, GLfloat units) //--------------
 +{
 +}
 +
 +
 +static void r700PolygonMode(GLcontext * ctx, GLenum face, GLenum mode) //------------------
 +{
 +}
 + 
 +static void r700RenderMode(GLcontext * ctx, GLenum mode) //---------------------
 +{
 +}
 +
 +static void r700ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) //-----------------
 +{
 +}
 +
 +void r700SetScissor(context_t *context) //---------------
 +{
 +      R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
 +      unsigned x1, y1, x2, y2;
 +      int id = 0;
 +      struct radeon_renderbuffer *rrb;
 +
 +      rrb = radeon_get_colorbuffer(&context->radeon);
 +      if (!rrb || !rrb->bo) {
 +              fprintf(stderr, "no rrb\n");
 +              return;
 +      }
 +      if (context->radeon.state.scissor.enabled) {
 +              x1 = context->radeon.state.scissor.rect.x1;
 +              y1 = context->radeon.state.scissor.rect.y1;
 +              x2 = context->radeon.state.scissor.rect.x2 - 1;
 +              y2 = context->radeon.state.scissor.rect.y2 - 1;
 +      } else {
 +              x1 = 0;
 +              y1 = 0;
 +              x2 = rrb->width - 1;
 +              y2 = rrb->height - 1;
 +      }
 +
 +      /* window */
 +      SETbit(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit);
 +      SETfield(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, x1,
 +               PA_SC_WINDOW_SCISSOR_TL__TL_X_shift, PA_SC_WINDOW_SCISSOR_TL__TL_X_mask);
 +      SETfield(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, y1,
 +               PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift, PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask);
 +
 +      SETfield(r700->PA_SC_WINDOW_SCISSOR_BR.u32All, x2,
 +               PA_SC_WINDOW_SCISSOR_BR__BR_X_shift, PA_SC_WINDOW_SCISSOR_BR__BR_X_mask);
 +      SETfield(r700->PA_SC_WINDOW_SCISSOR_BR.u32All, y2,
 +               PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift, PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask);
 +
 +
 +      SETfield(r700->PA_SC_CLIPRECT_0_TL.u32All, x1,
 +               PA_SC_CLIPRECT_0_TL__TL_X_shift, PA_SC_CLIPRECT_0_TL__TL_X_mask);
 +      SETfield(r700->PA_SC_CLIPRECT_0_TL.u32All, y1,
 +               PA_SC_CLIPRECT_0_TL__TL_Y_shift, PA_SC_CLIPRECT_0_TL__TL_Y_mask);
 +      SETfield(r700->PA_SC_CLIPRECT_0_BR.u32All, x2,
 +               PA_SC_CLIPRECT_0_BR__BR_X_shift, PA_SC_CLIPRECT_0_BR__BR_X_mask);
 +      SETfield(r700->PA_SC_CLIPRECT_0_BR.u32All, y2,
 +               PA_SC_CLIPRECT_0_BR__BR_Y_shift, PA_SC_CLIPRECT_0_BR__BR_Y_mask);
 +
 +      r700->PA_SC_CLIPRECT_1_TL.u32All = r700->PA_SC_CLIPRECT_0_TL.u32All;
 +      r700->PA_SC_CLIPRECT_1_BR.u32All = r700->PA_SC_CLIPRECT_0_BR.u32All;
 +      r700->PA_SC_CLIPRECT_2_TL.u32All = r700->PA_SC_CLIPRECT_0_TL.u32All;
 +      r700->PA_SC_CLIPRECT_2_BR.u32All = r700->PA_SC_CLIPRECT_0_BR.u32All;
 +      r700->PA_SC_CLIPRECT_3_TL.u32All = r700->PA_SC_CLIPRECT_0_TL.u32All;
 +      r700->PA_SC_CLIPRECT_3_BR.u32All = r700->PA_SC_CLIPRECT_0_BR.u32All;
 +
 +      /* more....2d clip */
 +      SETbit(r700->PA_SC_GENERIC_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit);
 +      SETfield(r700->PA_SC_GENERIC_SCISSOR_TL.u32All, x1,
 +               PA_SC_GENERIC_SCISSOR_TL__TL_X_shift, PA_SC_GENERIC_SCISSOR_TL__TL_X_mask);
 +      SETfield(r700->PA_SC_GENERIC_SCISSOR_TL.u32All, y1,
 +               PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift, PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask);
 +      SETfield(r700->PA_SC_GENERIC_SCISSOR_BR.u32All, x2,
 +               PA_SC_GENERIC_SCISSOR_BR__BR_X_shift, PA_SC_GENERIC_SCISSOR_BR__BR_X_mask);
 +      SETfield(r700->PA_SC_GENERIC_SCISSOR_BR.u32All, y2,
 +               PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift, PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask);
 +
 +      SETbit(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All, WINDOW_OFFSET_DISABLE_bit);
 +      SETfield(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All, x1,
 +               PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift, PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask);
 +      SETfield(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All, y1,
 +               PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift, PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask);
 +      SETfield(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All, x2,
 +               PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift, PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask);
 +      SETfield(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All, y2,
 +               PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift, PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask);
 +
 +      r700->viewport[id].PA_SC_VPORT_ZMIN_0.u32All = 0;
 +      r700->viewport[id].PA_SC_VPORT_ZMAX_0.u32All = 0x3F800000;
 +      r700->viewport[id].enabled = GL_TRUE;
 +}
 +
 +void r700SetRenderTarget(context_t *context, int id)
 +{
 +    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
 +
 +    struct radeon_renderbuffer *rrb;
 +    unsigned int nPitchInPixel;
 +
 +    /* screen/window/view */
 +    SETfield(r700->CB_TARGET_MASK.u32All, 0xF, (4 * id), TARGET0_ENABLE_mask);
 +
 +    rrb = radeon_get_colorbuffer(&context->radeon);
 +      if (!rrb || !rrb->bo) {
 +              fprintf(stderr, "no rrb\n");
 +              return;
 +      }
 +
 +    /* color buffer */
 +    r700->render_target[id].CB_COLOR0_BASE.u32All = context->radeon.state.color.draw_offset;
 +
 +    nPitchInPixel = rrb->pitch/rrb->cpp;
 +    SETfield(r700->render_target[id].CB_COLOR0_SIZE.u32All, (nPitchInPixel/8)-1,
 +             PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask);
 +    SETfield(r700->render_target[id].CB_COLOR0_SIZE.u32All, ( (nPitchInPixel * context->radeon.radeonScreen->driScreen->fbHeight)/64 )-1,
 +             SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask);
 +    r700->render_target[id].CB_COLOR0_BASE.u32All = 0;
 +    SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, ENDIAN_NONE, ENDIAN_shift, ENDIAN_mask);
 +    SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, ARRAY_LINEAR_GENERAL,
 +             CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
 +    if(4 == rrb->cpp)
 +    {
 +        SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, COLOR_8_8_8_8,
 +                 CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask);
 +        SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, SWAP_ALT, COMP_SWAP_shift, COMP_SWAP_mask);
 +    }
 +    else
 +    {
 +        SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, COLOR_5_6_5,
 +                 CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask);
 +        SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, SWAP_ALT_REV,
 +                 COMP_SWAP_shift, COMP_SWAP_mask);
 +    }
 +    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
 +    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, BLEND_CLAMP_bit);
 +    SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
 +
 +    CLEARfield(r700->render_target[id].CB_BLEND0_CONTROL.u32All, COLOR_SRCBLEND_mask); /* no dst blend */
 +    CLEARfield(r700->render_target[id].CB_BLEND0_CONTROL.u32All, ALPHA_SRCBLEND_mask); /* no dst blend */
 +
 +    r700->render_target[id].enabled = GL_TRUE;
 +}
 +
 +void r700SetDepthTarget(context_t *context)
 +{
 +    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
 +
 +    struct radeon_renderbuffer *rrb;
 +    unsigned int nPitchInPixel;
 +
 +    /* depth buf */
 +    r700->DB_DEPTH_SIZE.u32All = 0;
 +    r700->DB_DEPTH_BASE.u32All = 0;
 +    r700->DB_DEPTH_INFO.u32All = 0;
 +
 +    r700->DB_DEPTH_CONTROL.u32All   = 0;
 +    r700->DB_DEPTH_CLEAR.u32All     = 0x3F800000;
 +    r700->DB_DEPTH_VIEW.u32All      = 0;
 +    r700->DB_RENDER_CONTROL.u32All  = 0;
 +    r700->DB_RENDER_OVERRIDE.u32All = 0;
 +    if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
 +          SETbit(r700->DB_RENDER_OVERRIDE.u32All, FORCE_SHADER_Z_ORDER_bit);
 +    SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask);
 +    SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask);
 +    SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask);
 +
 +    r700->DB_ALPHA_TO_MASK.u32All = 0;
 +    SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET0_shift, ALPHA_TO_MASK_OFFSET0_mask);
 +    SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET1_shift, ALPHA_TO_MASK_OFFSET1_mask);
 +    SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET2_shift, ALPHA_TO_MASK_OFFSET2_mask);
 +    SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET3_shift, ALPHA_TO_MASK_OFFSET3_mask);
 +
 +    rrb = radeon_get_depthbuffer(&context->radeon);
 +      if (!rrb)
 +              return;
 +
 +    nPitchInPixel = rrb->pitch/rrb->cpp;
 +
 +    SETfield(r700->DB_DEPTH_SIZE.u32All, (nPitchInPixel/8)-1,
 +             PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask);
 +    SETfield(r700->DB_DEPTH_SIZE.u32All, ( (nPitchInPixel * context->radeon.radeonScreen->driScreen->fbHeight)/64 )-1,
 +             SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask); /* size in pixel / 64 - 1 */
 +
 +    if(4 == rrb->cpp)
 +    {
 +        switch (GL_CONTEXT(context)->Visual.depthBits)
 +        {
 +        case 16:
 +        case 24:
 +            SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_8_24,
 +                     DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask);
 +            break;
 +        default:
 +            fprintf(stderr, "Error: Unsupported depth %d... exiting\n",
 +                GL_CONTEXT(context)->Visual.depthBits);
 +            _mesa_exit(-1);
 +        }
 +    }
 +    else
 +    {
 +        SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_16,
 +                     DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask);
 +    }
 +    SETfield(r700->DB_DEPTH_INFO.u32All, ARRAY_2D_TILED_THIN1,
 +             DB_DEPTH_INFO__ARRAY_MODE_shift, DB_DEPTH_INFO__ARRAY_MODE_mask);
 +    /* r700->DB_PREFETCH_LIMIT.bits.DEPTH_HEIGHT_TILE_MAX = (context->currentDraw->h >> 3) - 1; */ /* z buffer sie may much bigger than what need, so use actual used h. */
 +}
 +
 +static void r700InitSQConfig(GLcontext * ctx)
 +{
 +    context_t *context = R700_CONTEXT(ctx);
 +    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
 +    int ps_prio;
 +    int vs_prio;
 +    int gs_prio;
 +    int es_prio;
 +    int num_ps_gprs;
 +    int num_vs_gprs;
 +    int num_gs_gprs;
 +    int num_es_gprs;
 +    int num_temp_gprs;
 +    int num_ps_threads;
 +    int num_vs_threads;
 +    int num_gs_threads;
 +    int num_es_threads;
 +    int num_ps_stack_entries;
 +    int num_vs_stack_entries;
 +    int num_gs_stack_entries;
 +    int num_es_stack_entries;
 +
 +    // SQ
 +    ps_prio = 0;
 +    vs_prio = 1;
 +    gs_prio = 2;
 +    es_prio = 3;
 +    switch (context->radeon.radeonScreen->chip_family) {
 +    case CHIP_FAMILY_R600:
 +          num_ps_gprs = 192;
 +          num_vs_gprs = 56;
 +          num_temp_gprs = 4;
 +          num_gs_gprs = 0;
 +          num_es_gprs = 0;
 +          num_ps_threads = 136;
 +          num_vs_threads = 48;
 +          num_gs_threads = 4;
 +          num_es_threads = 4;
 +          num_ps_stack_entries = 128;
 +          num_vs_stack_entries = 128;
 +          num_gs_stack_entries = 0;
 +          num_es_stack_entries = 0;
 +          break;
 +    case CHIP_FAMILY_RV630:
 +    case CHIP_FAMILY_RV635:
 +          num_ps_gprs = 84;
 +          num_vs_gprs = 36;
 +          num_temp_gprs = 4;
 +          num_gs_gprs = 0;
 +          num_es_gprs = 0;
 +          num_ps_threads = 144;
 +          num_vs_threads = 40;
 +          num_gs_threads = 4;
 +          num_es_threads = 4;
 +          num_ps_stack_entries = 40;
 +          num_vs_stack_entries = 40;
 +          num_gs_stack_entries = 32;
 +          num_es_stack_entries = 16;
 +          break;
 +    case CHIP_FAMILY_RV610:
 +    case CHIP_FAMILY_RV620:
 +    case CHIP_FAMILY_RS780:
 +    default:
 +          num_ps_gprs = 84;
 +          num_vs_gprs = 36;
 +          num_temp_gprs = 4;
 +          num_gs_gprs = 0;
 +          num_es_gprs = 0;
 +          num_ps_threads = 136;
 +          num_vs_threads = 48;
 +          num_gs_threads = 4;
 +          num_es_threads = 4;
 +          num_ps_stack_entries = 40;
 +          num_vs_stack_entries = 40;
 +          num_gs_stack_entries = 32;
 +          num_es_stack_entries = 16;
 +          break;
 +    case CHIP_FAMILY_RV670:
 +          num_ps_gprs = 144;
 +          num_vs_gprs = 40;
 +          num_temp_gprs = 4;
 +          num_gs_gprs = 0;
 +          num_es_gprs = 0;
 +          num_ps_threads = 136;
 +          num_vs_threads = 48;
 +          num_gs_threads = 4;
 +          num_es_threads = 4;
 +          num_ps_stack_entries = 40;
 +          num_vs_stack_entries = 40;
 +          num_gs_stack_entries = 32;
 +          num_es_stack_entries = 16;
 +          break;
 +    case CHIP_FAMILY_RV770:
 +          num_ps_gprs = 192;
 +          num_vs_gprs = 56;
 +          num_temp_gprs = 4;
 +          num_gs_gprs = 0;
 +          num_es_gprs = 0;
 +          num_ps_threads = 188;
 +          num_vs_threads = 60;
 +          num_gs_threads = 0;
 +          num_es_threads = 0;
 +          num_ps_stack_entries = 256;
 +          num_vs_stack_entries = 256;
 +          num_gs_stack_entries = 0;
 +          num_es_stack_entries = 0;
 +          break;
 +    case CHIP_FAMILY_RV730:
 +    case CHIP_FAMILY_RV740:
 +          num_ps_gprs = 84;
 +          num_vs_gprs = 36;
 +          num_temp_gprs = 4;
 +          num_gs_gprs = 0;
 +          num_es_gprs = 0;
 +          num_ps_threads = 188;
 +          num_vs_threads = 60;
 +          num_gs_threads = 0;
 +          num_es_threads = 0;
 +          num_ps_stack_entries = 128;
 +          num_vs_stack_entries = 128;
 +          num_gs_stack_entries = 0;
 +          num_es_stack_entries = 0;
 +          break;
 +    case CHIP_FAMILY_RV710:
 +          num_ps_gprs = 192;
 +          num_vs_gprs = 56;
 +          num_temp_gprs = 4;
 +          num_gs_gprs = 0;
 +          num_es_gprs = 0;
 +          num_ps_threads = 144;
 +          num_vs_threads = 48;
 +          num_gs_threads = 0;
 +          num_es_threads = 0;
 +          num_ps_stack_entries = 128;
 +          num_vs_stack_entries = 128;
 +          num_gs_stack_entries = 0;
 +          num_es_stack_entries = 0;
 +          break;
 +    }
 +
 +    r700->sq_config.SQ_CONFIG.u32All = 0;
 +    if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) ||
 +        (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) ||
 +      (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) ||
 +        (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710))
 +          CLEARbit(r700->sq_config.SQ_CONFIG.u32All, VC_ENABLE_bit);
 +    else
 +          SETbit(r700->sq_config.SQ_CONFIG.u32All, VC_ENABLE_bit);
 +    SETbit(r700->sq_config.SQ_CONFIG.u32All, DX9_CONSTS_bit);
 +    SETbit(r700->sq_config.SQ_CONFIG.u32All, ALU_INST_PREFER_VECTOR_bit);
 +    SETfield(r700->sq_config.SQ_CONFIG.u32All, ps_prio, PS_PRIO_shift, PS_PRIO_mask);
 +    SETfield(r700->sq_config.SQ_CONFIG.u32All, ps_prio, VS_PRIO_shift, VS_PRIO_mask);
 +    SETfield(r700->sq_config.SQ_CONFIG.u32All, ps_prio, GS_PRIO_shift, GS_PRIO_mask);
 +    SETfield(r700->sq_config.SQ_CONFIG.u32All, ps_prio, ES_PRIO_shift, ES_PRIO_mask);
 +
 +    r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All = 0;
 +    SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All, num_ps_gprs, NUM_PS_GPRS_shift, NUM_PS_GPRS_mask);
 +    SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All, num_vs_gprs, NUM_VS_GPRS_shift, NUM_VS_GPRS_mask);
 +    SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All, num_temp_gprs,
 +           NUM_CLAUSE_TEMP_GPRS_shift, NUM_CLAUSE_TEMP_GPRS_mask);
 +
 +    r700->sq_config.SQ_GPR_RESOURCE_MGMT_2.u32All = 0;
 +    SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_2.u32All, num_gs_gprs, NUM_GS_GPRS_shift, NUM_GS_GPRS_mask);
 +    SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_2.u32All, num_es_gprs, NUM_ES_GPRS_shift, NUM_ES_GPRS_mask);
 +
 +    r700->sq_config.SQ_THREAD_RESOURCE_MGMT.u32All = 0;
 +    SETfield(r700->sq_config.SQ_THREAD_RESOURCE_MGMT.u32All, num_ps_threads,
 +           NUM_PS_THREADS_shift, NUM_PS_THREADS_mask);
 +    SETfield(r700->sq_config.SQ_THREAD_RESOURCE_MGMT.u32All, num_vs_threads,
 +           NUM_VS_THREADS_shift, NUM_VS_THREADS_mask);
 +    SETfield(r700->sq_config.SQ_THREAD_RESOURCE_MGMT.u32All, num_gs_threads,
 +           NUM_GS_THREADS_shift, NUM_GS_THREADS_mask);
 +    SETfield(r700->sq_config.SQ_THREAD_RESOURCE_MGMT.u32All, num_es_threads,
 +           NUM_ES_THREADS_shift, NUM_ES_THREADS_mask);
 +
 +    r700->sq_config.SQ_STACK_RESOURCE_MGMT_1.u32All = 0;
 +    SETfield(r700->sq_config.SQ_STACK_RESOURCE_MGMT_1.u32All, num_ps_stack_entries,
 +           NUM_PS_STACK_ENTRIES_shift, NUM_PS_STACK_ENTRIES_mask);
 +    SETfield(r700->sq_config.SQ_STACK_RESOURCE_MGMT_1.u32All, num_vs_stack_entries,
 +           NUM_VS_STACK_ENTRIES_shift, NUM_VS_STACK_ENTRIES_mask);
 +
 +    r700->sq_config.SQ_STACK_RESOURCE_MGMT_2.u32All = 0;
 +    SETfield(r700->sq_config.SQ_STACK_RESOURCE_MGMT_2.u32All, num_gs_stack_entries,
 +           NUM_GS_STACK_ENTRIES_shift, NUM_GS_STACK_ENTRIES_mask);
 +    SETfield(r700->sq_config.SQ_STACK_RESOURCE_MGMT_2.u32All, num_es_stack_entries,
 +           NUM_ES_STACK_ENTRIES_shift, NUM_ES_STACK_ENTRIES_mask);
 +
 +}
 +
 +/**
 + * Calculate initial hardware state and register state functions.
 + * Assumes that the command buffer and state atoms have been
 + * initialized already.
 + */
 +void r700InitState(GLcontext * ctx) //-------------------
 +{
 +    context_t *context = R700_CONTEXT(ctx);
 +
 +    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
 +
 +    r700->TA_CNTL_AUX.u32All = 0;
 +    SETfield(r700->TA_CNTL_AUX.u32All, 28, TD_FIFO_CREDIT_shift, TD_FIFO_CREDIT_mask);
 +    r700->VC_ENHANCE.u32All = 0;
 +    r700->DB_WATERMARKS.u32All = 0;
 +    SETfield(r700->DB_WATERMARKS.u32All, 4, DEPTH_FREE_shift, DEPTH_FREE_mask);
 +    SETfield(r700->DB_WATERMARKS.u32All, 16, DEPTH_FLUSH_shift, DEPTH_FLUSH_mask);
 +    SETfield(r700->DB_WATERMARKS.u32All, 0, FORCE_SUMMARIZE_shift, FORCE_SUMMARIZE_mask);
 +    SETfield(r700->DB_WATERMARKS.u32All, 4, DEPTH_PENDING_FREE_shift, DEPTH_PENDING_FREE_mask);
 +    r700->SQ_DYN_GPR_CNTL_PS_FLUSH_REQ.u32All = 0;
 +    if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) {
 +          SETfield(r700->TA_CNTL_AUX.u32All, 3, GRADIENT_CREDIT_shift, GRADIENT_CREDIT_mask);
 +          r700->DB_DEBUG.u32All = 0x82000000;
 +          SETfield(r700->DB_WATERMARKS.u32All, 16, DEPTH_CACHELINE_FREE_shift, DEPTH_CACHELINE_FREE_mask);
 +    } else {
 +          SETfield(r700->TA_CNTL_AUX.u32All, 2, GRADIENT_CREDIT_shift, GRADIENT_CREDIT_mask);
 +          SETfield(r700->DB_WATERMARKS.u32All, 4, DEPTH_CACHELINE_FREE_shift, DEPTH_CACHELINE_FREE_mask);
 +          SETbit(r700->SQ_DYN_GPR_CNTL_PS_FLUSH_REQ.u32All, VS_PC_LIMIT_ENABLE_bit);
 +    }
 +
 +    /* Turn off vgt reuse */
 +    r700->VGT_REUSE_OFF.u32All = 0;
 +    SETbit(r700->VGT_REUSE_OFF.u32All, REUSE_OFF_bit);
 +
 +    /* Specify offsetting and clamp values for vertices */
 +    r700->VGT_MAX_VTX_INDX.u32All      = 0xFFFFFF;
 +    r700->VGT_MIN_VTX_INDX.u32All      = 0;
 +    r700->VGT_INDX_OFFSET.u32All    = 0;
 +
 +    /* Specify the number of instances */
 +    r700->VGT_DMA_NUM_INSTANCES.u32All = 1;
 +
 +    /* not alpha blend */
 +    CLEARfield(r700->SX_ALPHA_TEST_CONTROL.u32All, ALPHA_FUNC_mask);
 +    CLEARbit(r700->SX_ALPHA_TEST_CONTROL.u32All, ALPHA_TEST_ENABLE_bit);
 +
 +    /* default shader connections. */
 +    r700->SPI_VS_OUT_ID_0.u32All  = 0x03020100;
 +    r700->SPI_VS_OUT_ID_1.u32All  = 0x07060504;
 +
 +    r700->SPI_PS_INPUT_CNTL[0].u32All  = 0x00000800;
 +    r700->SPI_PS_INPUT_CNTL[1].u32All  = 0x00000801;
 +    r700->SPI_PS_INPUT_CNTL[2].u32All  = 0x00000802;
 +
 +    r700->SPI_THREAD_GROUPING.u32All = 0;
 +    if (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV770)
 +          SETfield(r700->SPI_THREAD_GROUPING.u32All, 1, PS_GROUPING_shift, PS_GROUPING_mask);
 +
 +    SETfield(r700->CB_COLOR_CONTROL.u32All, 0xCC, ROP3_shift, ROP3_mask);
 +    CLEARbit(r700->CB_COLOR_CONTROL.u32All, PER_MRT_BLEND_bit);
 +
 +    r700->DB_SHADER_CONTROL.u32All = 0;
 +    SETbit(r700->DB_SHADER_CONTROL.u32All, DUAL_EXPORT_ENABLE_bit);
 +
 +    /* Set up the culling control register */
 +    SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_TRIANGLES,
 +             POLYMODE_FRONT_PTYPE_shift, POLYMODE_FRONT_PTYPE_mask);
 +    SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_TRIANGLES,
 +             POLYMODE_BACK_PTYPE_shift, POLYMODE_BACK_PTYPE_mask);
 +
 +    /* screen */
 +    r700->PA_SC_SCREEN_SCISSOR_TL.u32All = 0x0;
 +
 +    SETfield(r700->PA_SC_SCREEN_SCISSOR_BR.u32All,
 +           ((RADEONDRIPtr)(context->radeon.radeonScreen->driScreen->pDevPriv))->width,
 +           PA_SC_SCREEN_SCISSOR_BR__BR_X_shift, PA_SC_SCREEN_SCISSOR_BR__BR_X_mask);
 +    SETfield(r700->PA_SC_SCREEN_SCISSOR_BR.u32All,
 +           ((RADEONDRIPtr)(context->radeon.radeonScreen->driScreen->pDevPriv))->height,
 +           PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift, PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask);
 +
 +    /* 4 clip rectangles */ /* TODO : set these clip rects according to context->currentDraw->numClipRects */
 +    r700->PA_SC_CLIPRECT_RULE.u32All = 0;
 +    SETfield(r700->PA_SC_CLIPRECT_RULE.u32All, CLIP_RULE_mask, CLIP_RULE_shift, CLIP_RULE_mask);
 +
 +    if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
 +          r700->PA_SC_EDGERULE.u32All = 0;
 +    else
 +          r700->PA_SC_EDGERULE.u32All = 0xAAAAAAAA;
 +
 +    if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) {
 +          r700->PA_SC_MODE_CNTL.u32All = 0;
 +          SETbit(r700->PA_SC_MODE_CNTL.u32All, WALK_ORDER_ENABLE_bit);
 +          SETbit(r700->PA_SC_MODE_CNTL.u32All, FORCE_EOV_CNTDWN_ENABLE_bit);
 +    } else {
 +          r700->PA_SC_MODE_CNTL.u32All = 0x00500000;
 +          SETbit(r700->PA_SC_MODE_CNTL.u32All, FORCE_EOV_REZ_ENABLE_bit);
 +          SETbit(r700->PA_SC_MODE_CNTL.u32All, FORCE_EOV_CNTDWN_ENABLE_bit);
 +    }
 +
 +    /* Do scale XY and Z by 1/W0. */
 +    r700->bEnablePerspective = GL_TRUE;
 +    CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit);
 +    CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit);
 +    SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit);
 +
 +    /* Enable viewport scaling for all three axis */
 +    SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_X_SCALE_ENA_bit);
 +    SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_X_OFFSET_ENA_bit);
 +    SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Y_SCALE_ENA_bit);
 +    SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Y_OFFSET_ENA_bit);
 +    SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Z_SCALE_ENA_bit);
 +    SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Z_OFFSET_ENA_bit);
 +
 +    /* Set up point sizes and min/max values */
 +    SETfield(r700->PA_SU_POINT_SIZE.u32All, 0x8,
 +             PA_SU_POINT_SIZE__HEIGHT_shift, PA_SU_POINT_SIZE__HEIGHT_mask);
 +    SETfield(r700->PA_SU_POINT_SIZE.u32All, 0x8,
 +             PA_SU_POINT_SIZE__WIDTH_shift, PA_SU_POINT_SIZE__WIDTH_mask);
 +    CLEARfield(r700->PA_SU_POINT_MINMAX.u32All, MIN_SIZE_mask);
 +    SETfield(r700->PA_SU_POINT_MINMAX.u32All, 0x8000, MAX_SIZE_shift, MAX_SIZE_mask);
 +
 +    /* Set up line control */
 +    SETfield(r700->PA_SU_LINE_CNTL.u32All, 0x8,
 +             PA_SU_LINE_CNTL__WIDTH_shift, PA_SU_LINE_CNTL__WIDTH_mask);
 +    SETfield(r700->PA_SU_LINE_CNTL.u32All, 0x2,
 +             PA_SU_VTX_CNTL__ROUND_MODE_shift, PA_SU_VTX_CNTL__ROUND_MODE_mask);
 +    SETfield(r700->PA_SU_LINE_CNTL.u32All, 0x5,
 +             QUANT_MODE_shift, QUANT_MODE_mask);
 +
 +    r700->PA_SC_LINE_CNTL.u32All = 0;
 +    CLEARbit(r700->PA_SC_LINE_CNTL.u32All, EXPAND_LINE_WIDTH_bit);
 +    SETbit(r700->PA_SC_LINE_CNTL.u32All, LAST_PIXEL_bit);
 +
 +    /* Set up vertex control */
 +    r700->PA_SU_VTX_CNTL.u32All = 0;
 +    CLEARfield(r700->PA_SU_VTX_CNTL.u32All, QUANT_MODE_mask);
 +    SETbit(r700->PA_SU_VTX_CNTL.u32All, PIX_CENTER_bit);
 +    SETfield(r700->PA_SU_VTX_CNTL.u32All, X_ROUND_TO_EVEN,
 +             PA_SU_VTX_CNTL__ROUND_MODE_shift, PA_SU_VTX_CNTL__ROUND_MODE_mask);
 +
 +    /* to 1.0 = no guard band */
 +    r700->PA_CL_GB_VERT_CLIP_ADJ.u32All  = 0x3F800000;  /* 1.0 */
 +    r700->PA_CL_GB_VERT_DISC_ADJ.u32All  = 0x3F800000;
 +    r700->PA_CL_GB_HORZ_CLIP_ADJ.u32All  = 0x3F800000;
 +    r700->PA_CL_GB_HORZ_DISC_ADJ.u32All  = 0x3F800000;
 +
 +    /* CB */
 +    r700->CB_CLEAR_RED_R6XX.f32All = 1.0; //r6xx only
 +    r700->CB_CLEAR_GREEN_R6XX.f32All = 0.0; //r6xx only
 +    r700->CB_CLEAR_BLUE_R6XX.f32All = 1.0; //r6xx only
 +    r700->CB_CLEAR_ALPHA_R6XX.f32All = 1.0; //r6xx only
 +    r700->CB_FOG_RED_R6XX.u32All = 0; //r6xx only
 +    r700->CB_FOG_GREEN_R6XX.u32All = 0; //r6xx only
 +    r700->CB_FOG_BLUE_R6XX.u32All = 0; //r6xx only
 +
 +    r700->CB_BLEND_RED.u32All = 0;
 +    r700->CB_BLEND_GREEN.u32All = 0;
 +    r700->CB_BLEND_BLUE.u32All = 0;
 +    r700->CB_BLEND_ALPHA.u32All = 0;
 +
 +    r700->CB_BLEND_CONTROL.u32All = 0;
 +
 +    /* Disable color compares */
 +    SETfield(r700->CB_CLRCMP_CONTROL.u32All, CLRCMP_DRAW_ALWAYS,
 +             CLRCMP_FCN_SRC_shift, CLRCMP_FCN_SRC_mask);
 +    SETfield(r700->CB_CLRCMP_CONTROL.u32All, CLRCMP_DRAW_ALWAYS,
 +             CLRCMP_FCN_DST_shift, CLRCMP_FCN_DST_mask);
 +    SETfield(r700->CB_CLRCMP_CONTROL.u32All, CLRCMP_SEL_SRC,
 +             CLRCMP_FCN_SEL_shift, CLRCMP_FCN_SEL_mask);
 +
 +    /* Zero out source */
 +    r700->CB_CLRCMP_SRC.u32All = 0x00000000;
 +
 +    /* Put a compare color in for error checking */
 +    r700->CB_CLRCMP_DST.u32All = 0x000000FF;
 +
 +    /* Set up color compare mask */
 +    r700->CB_CLRCMP_MSK.u32All = 0xFFFFFFFF;
 +
 +    /* default color mask */
 +    SETfield(r700->CB_SHADER_MASK.u32All, 0xF, OUTPUT0_ENABLE_shift, OUTPUT0_ENABLE_mask);
 +
 +    /* Enable all samples for multi-sample anti-aliasing */
 +    r700->PA_SC_AA_MASK.u32All = 0xFFFFFFFF;
 +    /* Turn off AA */
 +    r700->PA_SC_AA_CONFIG.u32All = 0;
 +
 +    r700->SX_MISC.u32All = 0;
 +
 +    r700InitSQConfig(ctx);
 +}
 +
 +void r700InitStateFuncs(struct dd_function_table *functions) //-----------------
 +{
 +      functions->UpdateState = r700InvalidateState;
 +      functions->AlphaFunc = r700AlphaFunc;
 +      functions->BlendColor = r700BlendColor;
 +      functions->BlendEquationSeparate = r700BlendEquationSeparate;
 +      functions->BlendFuncSeparate = r700BlendFuncSeparate;
 +      functions->Enable = r700Enable;
 +      functions->ColorMask = r700ColorMask;
 +      functions->DepthFunc = r700DepthFunc;
 +      functions->DepthMask = r700DepthMask;
 +      functions->CullFace = r700CullFace;
 +      functions->Fogfv = r700Fogfv;
 +      functions->FrontFace = r700FrontFace;
 +      functions->ShadeModel = r700ShadeModel;
 +
 +      /* ARB_point_parameters */
 +      functions->PointParameterfv = r700PointParameter;
 +
 +      /* Stencil related */
 +      functions->StencilFuncSeparate = r700StencilFuncSeparate;
 +      functions->StencilMaskSeparate = r700StencilMaskSeparate;
 +      functions->StencilOpSeparate = r700StencilOpSeparate;
 +
 +      /* Viewport related */
 +      functions->Viewport = r700Viewport;
 +      functions->DepthRange = r700DepthRange;
 +      functions->PointSize = r700PointSize;
 +      functions->LineWidth = r700LineWidth;
 +
 +      functions->PolygonOffset = r700PolygonOffset;
 +      functions->PolygonMode = r700PolygonMode;
 +
 +      functions->RenderMode = r700RenderMode;
 +
 +      functions->ClipPlane = r700ClipPlane;
 +
 +      functions->Scissor = radeonScissor;
 +
 +      functions->DrawBuffer           = radeonDrawBuffer;
 +      functions->ReadBuffer           = radeonReadBuffer;
 +
 +}
 +
index 1e900865f02d52313e7eca1beb1809da86e696a0,eb0e5b35e5ecc6decc6b9f01c4034efecd8eb730..4bf006c7a55e7ab8f365d1376d58c44408250a90
@@@ -72,23 -105,25 +107,29 @@@ static const GLubyte *radeonGetString(G
                unsigned offset;
                GLuint agp_mode = (radeon->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 :
                        radeon->radeonScreen->AGPMode;
-               const char* chipname;
+               const char* chipclass;
+               char hardwarename[32];
  
 -              if (IS_R300_CLASS(radeon->radeonScreen))
 +              if (IS_R600_CLASS(radeon->radeonScreen))
-                       chipname = "R600";
++                      chipclass = "R600";
 +              else if (IS_R300_CLASS(radeon->radeonScreen))
-                       chipname = "R300";
+                       chipclass = "R300";
                else if (IS_R200_CLASS(radeon->radeonScreen))
-                       chipname = "R200";
+                       chipclass = "R200";
                else
-                       chipname = "R100";
+                       chipclass = "R100";
  
-               offset = driGetRendererString(buffer, chipname, DRIVER_DATE,
+               sprintf(hardwarename, "%s (%s %04X)",
+                       chipclass,
+                       get_chip_family_name(radeon->radeonScreen->chip_family),
+                       radeon->radeonScreen->device_id);
+               offset = driGetRendererString(buffer, hardwarename, DRIVER_DATE,
                                              agp_mode);
  
 -              if (IS_R300_CLASS(radeon->radeonScreen)) {
 +              if (IS_R600_CLASS(radeon->radeonScreen)) {
 +                      sprintf(&buffer[offset], " TCL");
 +              } else if (IS_R300_CLASS(radeon->radeonScreen)) {
                        sprintf(&buffer[offset], " %sTCL",
                                (radeon->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)
                                ? "" : "NO-");
@@@ -225,17 -255,9 +266,17 @@@ void radeonDestroyContext(__DRIcontextP
                radeon_firevertices(radeon);
                _mesa_make_current(NULL, NULL, NULL);
        }
-       
        assert(radeon);
 -      if (radeon) {
 +      if (radeon) 
 +    {
 +
 +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) /* +r6/r7 */
 +          if (IS_R600_CLASS(screen))
 +        {
 +              r600DestroyContext(driContextPriv);
 +        }
 +#endif
  
                if (radeon->dma.current) {
                        rcommonFlushCmdBuf( radeon, __FUNCTION__ );
                 */
                /* free the option cache */
                driDestroyOptionCache(&radeon->optionCache);
-               
                rcommonDestroyCmdBuf(radeon);
  
 +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) /* +r6/r7 */
 +          if (!IS_R600_CLASS(screen))
 +#endif
                radeon_destroy_atom_list(radeon);
  
                if (radeon->state.scissor.pClipRects) {
index 0a33fe4afa402a6a8821dde93b98af0b4b7f9722,55aa4502dae01920c84635959437734e5c3984cf..f04a07fecd22d46dfb3dfff8d967ec0643f6c19e
@@@ -181,15 -207,11 +207,18 @@@ radeon_mipmap_tree* radeon_miptree_crea
        mt->tilebits = tilebits;
        mt->compressed = compressed;
  
-       calculate_miptree_layout(mt);
+       if (rmesa->radeonScreen->chip_family >= CHIP_FAMILY_R300)
+               calculate_miptree_layout_r300(rmesa, mt);
+       else
+               calculate_miptree_layout_r100(rmesa, mt);
  
 +#ifdef RADEON_DEBUG_BO
 +    mt->bo = radeon_bo_open(rmesa->radeonScreen->bom,
 +                            0, mt->totalsize, 1024,
 +                            RADEON_GEM_DOMAIN_VRAM,
 +                            0,
 +                            "MIPMAP TREE");
 +#else
        mt->bo = radeon_bo_open(rmesa->radeonScreen->bom,
                              0, mt->totalsize, 1024,
                              RADEON_GEM_DOMAIN_VRAM,
index a0e1b131781ebf27a1299b0dee5321bc62f7455f,12ae4ada5db4898a808f1446aa4c74cba4809271..e23d53c7a1adfe7fb25ee7eb8e80cd7089f6b712
@@@ -402,21 -398,9 +402,22 @@@ static const __DRItexBufferExtension r3
  };
  #endif
  
 +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
 +static const __DRItexOffsetExtension r600texOffsetExtension = {
 +    { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
 +   r700SetTexOffset, /* +r6/r7 */
 +};
 +
 +static const __DRItexBufferExtension r600TexBufferExtension = {
 +    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
 +   r700SetTexBuffer,  /* +r6/r7 */
 +   r700SetTexBuffer2, /* +r6/r7 */
 +};
 +#endif
 +
  static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
  {
+    screen->device_id = device_id;
     screen->chip_flags = 0;
     switch ( device_id ) {
     case PCI_CHIP_RADEON_LY:
@@@ -1008,71 -817,64 +992,68 @@@ radeonCreateScreen( __DRIscreenPrivate 
        screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25);
     }
  
-    if (!screen->kernel_mm) {
-      screen->mmio.handle = dri_priv->registerHandle;
-      screen->mmio.size   = dri_priv->registerSize;
-      if ( drmMap( sPriv->fd,
-                 screen->mmio.handle,
-                 screen->mmio.size,
-                 &screen->mmio.map ) ) {
-        FREE( screen );
-        __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
-        return NULL;
-      }
 +   ret = radeon_set_screen_flags(screen, dri_priv->deviceID);
 +   if (ret == -1)
 +     return NULL;
 +
 -   screen->scratch = (__volatile__ uint32_t *)
 -     ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
+    screen->mmio.handle = dri_priv->registerHandle;
+    screen->mmio.size   = dri_priv->registerSize;
+    if ( drmMap( sPriv->fd,
+               screen->mmio.handle,
+               screen->mmio.size,
+               &screen->mmio.map ) ) {
+      FREE( screen );
+      __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
+      return NULL;
+    }
+    RADEONMMIO = screen->mmio.map;
+    screen->status.handle = dri_priv->statusHandle;
+    screen->status.size   = dri_priv->statusSize;
+    if ( drmMap( sPriv->fd,
+               screen->status.handle,
+               screen->status.size,
+               &screen->status.map ) ) {
+      drmUnmap( screen->mmio.map, screen->mmio.size );
+      FREE( screen );
+      __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
+      return NULL;
+    }
++   if (screen->chip_family < CHIP_FAMILY_R600)
++         screen->scratch = (__volatile__ uint32_t *)
++                 ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
++   else
++         screen->scratch = (__volatile__ uint32_t *)
++                 ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
  
-      RADEONMMIO = screen->mmio.map;
+    screen->buffers = drmMapBufs( sPriv->fd );
+    if ( !screen->buffers ) {
+      drmUnmap( screen->status.map, screen->status.size );
+      drmUnmap( screen->mmio.map, screen->mmio.size );
+      FREE( screen );
+      __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ );
+      return NULL;
+    }
  
-      screen->status.handle = dri_priv->statusHandle;
-      screen->status.size   = dri_priv->statusSize;
+    if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
+      screen->gartTextures.handle = dri_priv->gartTexHandle;
+      screen->gartTextures.size   = dri_priv->gartTexMapSize;
       if ( drmMap( sPriv->fd,
-                 screen->status.handle,
-                 screen->status.size,
-                 &screen->status.map ) ) {
-        drmUnmap( screen->mmio.map, screen->mmio.size );
-        FREE( screen );
-        __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
-        return NULL;
-      }
-      if (screen->chip_family < CHIP_FAMILY_R600)
-            screen->scratch = (__volatile__ uint32_t *)
-                    ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
-      else
-            screen->scratch = (__volatile__ uint32_t *)
-                    ((GLubyte *)screen->status.map + R600_SCRATCH_REG_OFFSET);
-      screen->buffers = drmMapBufs( sPriv->fd );
-      if ( !screen->buffers ) {
+                 screen->gartTextures.handle,
+                 screen->gartTextures.size,
+                 (drmAddressPtr)&screen->gartTextures.map ) ) {
+        drmUnmapBufs( screen->buffers );
         drmUnmap( screen->status.map, screen->status.size );
         drmUnmap( screen->mmio.map, screen->mmio.size );
         FREE( screen );
-        __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ );
+        __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__);
         return NULL;
-      }
-      
-      if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
-        screen->gartTextures.handle = dri_priv->gartTexHandle;
-        screen->gartTextures.size   = dri_priv->gartTexMapSize;
-        if ( drmMap( sPriv->fd,
-                   screen->gartTextures.handle,
-                   screen->gartTextures.size,
-                   (drmAddressPtr)&screen->gartTextures.map ) ) {
-        drmUnmapBufs( screen->buffers );
-        drmUnmap( screen->status.map, screen->status.size );
-        drmUnmap( screen->mmio.map, screen->mmio.size );
-        FREE( screen );
-        __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__);
-        return NULL;
-        }
-        
-        screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
-      }
-    }
+     }
  
+      screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
+    }
  
 -   ret = radeon_set_screen_flags(screen, dri_priv->deviceID);
 -   if (ret == -1)
 -     return NULL;
 -
     if ((screen->chip_family == CHIP_FAMILY_R350 || screen->chip_family == CHIP_FAMILY_R300) &&
         sPriv->ddx_version.minor < 2) {
        fprintf(stderr, "xf86-video-ati-6.6.2 or newer needed for Radeon 9500/9700/9800 cards.\n");
  #endif
  
  #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
-         screen->extensions[i++] = &r300texOffsetExtension.base;
+    screen->extensions[i++] = &r300texOffsetExtension.base;
  #endif
  
-         screen->extensions[i++] = &r600texOffsetExtension.base;
 +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
-    }
++   screen->extensions[i++] = &r600texOffsetExtension.base;
 +#endif
 +
     screen->extensions[i++] = NULL;
     sPriv->extensions = screen->extensions;
  
@@@ -1346,8 -1122,30 +1341,20 @@@ radeonCreateScreen2(__DRIscreenPrivate 
         } else {
           screen->num_gb_pipes = temp;
         }
+        /* pipe overrides */
+        switch (device_id) {
+        case PCI_CHIP_R300_AD: /* 9500 with 1 quadpipe verified by: Reid Linnemann <lreid@cs.okstate.edu> */
+        case PCI_CHIP_RV410_5E4C: /* RV410 SE only have 1 quadpipe */
+        case PCI_CHIP_RV410_5E4F: /* RV410 SE only have 1 quadpipe */
+          screen->num_gb_pipes = 1;
+          break;
+        default:
+          break;
+        }
     }
  
 -   if (screen->chip_family <= CHIP_FAMILY_RS200)
 -      screen->chip_flags |= RADEON_CLASS_R100;
 -   else if (screen->chip_family <= CHIP_FAMILY_RV280)
 -      screen->chip_flags |= RADEON_CLASS_R200;
 -   else
 -      screen->chip_flags |= RADEON_CLASS_R300;
 -
 -   if (getenv("R300_NO_TCL"))
 -     screen->chip_flags &= ~RADEON_CHIPSET_TCL;
 -
     i = 0;
     screen->extensions[i++] = &driCopySubBufferExtension.base;
     screen->extensions[i++] = &driFrameTrackingExtension.base;