2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
33 #include "main/mtypes.h"
34 #include "main/imports.h"
36 #include "radeon_debug.h"
37 #include "r600_context.h"
39 #include "r700_assembler.h"
41 #define USE_CF_FOR_CONTINUE_BREAK 1
42 #define USE_CF_FOR_POP_AFTER 1
44 BITS
addrmode_PVSDST(PVSDST
* pPVSDST
)
46 return pPVSDST
->addrmode0
| ((BITS
)pPVSDST
->addrmode1
<< 1);
49 void setaddrmode_PVSDST(PVSDST
* pPVSDST
, BITS addrmode
)
51 pPVSDST
->addrmode0
= addrmode
& 1;
52 pPVSDST
->addrmode1
= (addrmode
>> 1) & 1;
55 void nomask_PVSDST(PVSDST
* pPVSDST
)
57 pPVSDST
->writex
= pPVSDST
->writey
= pPVSDST
->writez
= pPVSDST
->writew
= 1;
60 BITS
addrmode_PVSSRC(PVSSRC
* pPVSSRC
)
62 return pPVSSRC
->addrmode0
| ((BITS
)pPVSSRC
->addrmode1
<< 1);
65 void setaddrmode_PVSSRC(PVSSRC
* pPVSSRC
, BITS addrmode
)
67 pPVSSRC
->addrmode0
= addrmode
& 1;
68 pPVSSRC
->addrmode1
= (addrmode
>> 1) & 1;
72 void setswizzle_PVSSRC(PVSSRC
* pPVSSRC
, BITS swz
)
77 pPVSSRC
->swizzlew
= swz
;
80 void noswizzle_PVSSRC(PVSSRC
* pPVSSRC
)
82 pPVSSRC
->swizzlex
= SQ_SEL_X
;
83 pPVSSRC
->swizzley
= SQ_SEL_Y
;
84 pPVSSRC
->swizzlez
= SQ_SEL_Z
;
85 pPVSSRC
->swizzlew
= SQ_SEL_W
;
89 swizzleagain_PVSSRC(PVSSRC
* pPVSSRC
, BITS x
, BITS y
, BITS z
, BITS w
)
93 case SQ_SEL_X
: x
= pPVSSRC
->swizzlex
;
95 case SQ_SEL_Y
: x
= pPVSSRC
->swizzley
;
97 case SQ_SEL_Z
: x
= pPVSSRC
->swizzlez
;
99 case SQ_SEL_W
: x
= pPVSSRC
->swizzlew
;
106 case SQ_SEL_X
: y
= pPVSSRC
->swizzlex
;
108 case SQ_SEL_Y
: y
= pPVSSRC
->swizzley
;
110 case SQ_SEL_Z
: y
= pPVSSRC
->swizzlez
;
112 case SQ_SEL_W
: y
= pPVSSRC
->swizzlew
;
119 case SQ_SEL_X
: z
= pPVSSRC
->swizzlex
;
121 case SQ_SEL_Y
: z
= pPVSSRC
->swizzley
;
123 case SQ_SEL_Z
: z
= pPVSSRC
->swizzlez
;
125 case SQ_SEL_W
: z
= pPVSSRC
->swizzlew
;
132 case SQ_SEL_X
: w
= pPVSSRC
->swizzlex
;
134 case SQ_SEL_Y
: w
= pPVSSRC
->swizzley
;
136 case SQ_SEL_Z
: w
= pPVSSRC
->swizzlez
;
138 case SQ_SEL_W
: w
= pPVSSRC
->swizzlew
;
143 pPVSSRC
->swizzlex
= x
;
144 pPVSSRC
->swizzley
= y
;
145 pPVSSRC
->swizzlez
= z
;
146 pPVSSRC
->swizzlew
= w
;
149 void neg_PVSSRC(PVSSRC
* pPVSSRC
)
157 void noneg_PVSSRC(PVSSRC
* pPVSSRC
)
165 // negate argument (for SUB instead of ADD and alike)
166 void flipneg_PVSSRC(PVSSRC
* pPVSSRC
)
168 pPVSSRC
->negx
= !pPVSSRC
->negx
;
169 pPVSSRC
->negy
= !pPVSSRC
->negy
;
170 pPVSSRC
->negz
= !pPVSSRC
->negz
;
171 pPVSSRC
->negw
= !pPVSSRC
->negw
;
174 void zerocomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
)
178 case 0: pPVSSRC
->swizzlex
= SQ_SEL_0
; pPVSSRC
->negx
= 0; break;
179 case 1: pPVSSRC
->swizzley
= SQ_SEL_0
; pPVSSRC
->negy
= 0; break;
180 case 2: pPVSSRC
->swizzlez
= SQ_SEL_0
; pPVSSRC
->negz
= 0; break;
181 case 3: pPVSSRC
->swizzlew
= SQ_SEL_0
; pPVSSRC
->negw
= 0; break;
186 void onecomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
)
190 case 0: pPVSSRC
->swizzlex
= SQ_SEL_1
; pPVSSRC
->negx
= 0; break;
191 case 1: pPVSSRC
->swizzley
= SQ_SEL_1
; pPVSSRC
->negy
= 0; break;
192 case 2: pPVSSRC
->swizzlez
= SQ_SEL_1
; pPVSSRC
->negz
= 0; break;
193 case 3: pPVSSRC
->swizzlew
= SQ_SEL_1
; pPVSSRC
->negw
= 0; break;
198 BITS
is_misc_component_exported(VAP_OUT_VTX_FMT_0
* pOutVTXFmt0
)
200 return (pOutVTXFmt0
->point_size
|
201 pOutVTXFmt0
->edge_flag
|
202 pOutVTXFmt0
->rta_index
|
203 pOutVTXFmt0
->kill_flag
|
204 pOutVTXFmt0
->viewport_index
);
207 BITS
is_depth_component_exported(OUT_FRAGMENT_FMT_0
* pFPOutFmt
)
209 return (pFPOutFmt
->depth
|
210 pFPOutFmt
->stencil_ref
|
212 pFPOutFmt
->coverage_to_mask
);
215 GLboolean
is_reduction_opcode(PVSDWORD
* dest
)
217 if (dest
->dst
.op3
== 0)
219 if ( (dest
->dst
.opcode
== SQ_OP2_INST_DOT4
|| dest
->dst
.opcode
== SQ_OP2_INST_DOT4_IEEE
|| dest
->dst
.opcode
== SQ_OP2_INST_CUBE
) )
227 GLuint
GetSurfaceFormat(GLenum eType
, GLuint nChannels
, GLuint
* pClient_size
)
229 GLuint format
= FMT_INVALID
;
230 GLuint uiElemSize
= 0;
235 case GL_UNSIGNED_BYTE
:
240 format
= FMT_8
; break;
242 format
= FMT_8_8
; break;
244 format
= FMT_8_8_8
; break;
246 format
= FMT_8_8_8_8
; break;
252 case GL_UNSIGNED_SHORT
:
258 format
= FMT_16
; break;
260 format
= FMT_16_16
; break;
262 format
= FMT_16_16_16
; break;
264 format
= FMT_16_16_16_16
; break;
270 case GL_UNSIGNED_INT
:
276 format
= FMT_32
; break;
278 format
= FMT_32_32
; break;
280 format
= FMT_32_32_32
; break;
282 format
= FMT_32_32_32_32
; break;
293 format
= FMT_32_FLOAT
; break;
295 format
= FMT_32_32_FLOAT
; break;
297 format
= FMT_32_32_32_FLOAT
; break;
299 format
= FMT_32_32_32_32_FLOAT
; break;
309 format
= FMT_32_FLOAT
; break;
311 format
= FMT_32_32_FLOAT
; break;
313 format
= FMT_32_32_32_FLOAT
; break;
315 format
= FMT_32_32_32_32_FLOAT
; break;
322 //GL_ASSERT_NO_CASE();
325 if(NULL
!= pClient_size
)
327 *pClient_size
= uiElemSize
* nChannels
;
333 unsigned int r700GetNumOperands(r700_AssemblerBase
* pAsm
)
340 switch (pAsm
->D
.dst
.opcode
)
342 case SQ_OP2_INST_ADD
:
343 case SQ_OP2_INST_KILLE
:
344 case SQ_OP2_INST_KILLGT
:
345 case SQ_OP2_INST_KILLGE
:
346 case SQ_OP2_INST_KILLNE
:
347 case SQ_OP2_INST_MUL
:
348 case SQ_OP2_INST_MAX
:
349 case SQ_OP2_INST_MIN
:
350 //case SQ_OP2_INST_MAX_DX10:
351 //case SQ_OP2_INST_MIN_DX10:
352 case SQ_OP2_INST_SETE
:
353 case SQ_OP2_INST_SETNE
:
354 case SQ_OP2_INST_SETGT
:
355 case SQ_OP2_INST_SETGE
:
356 case SQ_OP2_INST_PRED_SETE
:
357 case SQ_OP2_INST_PRED_SETGT
:
358 case SQ_OP2_INST_PRED_SETGE
:
359 case SQ_OP2_INST_PRED_SETNE
:
360 case SQ_OP2_INST_DOT4
:
361 case SQ_OP2_INST_DOT4_IEEE
:
362 case SQ_OP2_INST_CUBE
:
365 case SQ_OP2_INST_MOV
:
366 case SQ_OP2_INST_MOVA_FLOOR
:
367 case SQ_OP2_INST_FRACT
:
368 case SQ_OP2_INST_FLOOR
:
369 case SQ_OP2_INST_TRUNC
:
370 case SQ_OP2_INST_EXP_IEEE
:
371 case SQ_OP2_INST_LOG_CLAMPED
:
372 case SQ_OP2_INST_LOG_IEEE
:
373 case SQ_OP2_INST_RECIP_IEEE
:
374 case SQ_OP2_INST_RECIPSQRT_IEEE
:
375 case SQ_OP2_INST_FLT_TO_INT
:
376 case SQ_OP2_INST_SIN
:
377 case SQ_OP2_INST_COS
:
380 default: radeon_error(
381 "Need instruction operand number for %x.\n", pAsm
->D
.dst
.opcode
);
387 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt
, r700_AssemblerBase
* pAsm
, R700_Shader
* pShader
)
391 Init_R700_Shader(pShader
);
392 pAsm
->pR700Shader
= pShader
;
393 pAsm
->currentShaderType
= spt
;
395 pAsm
->cf_last_export_ptr
= NULL
;
397 pAsm
->cf_current_export_clause_ptr
= NULL
;
398 pAsm
->cf_current_alu_clause_ptr
= NULL
;
399 pAsm
->cf_current_tex_clause_ptr
= NULL
;
400 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
401 pAsm
->cf_current_cf_clause_ptr
= NULL
;
403 // No clause has been created yet
404 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
406 pAsm
->number_of_colorandz_exports
= 0;
407 pAsm
->number_of_exports
= 0;
408 pAsm
->number_of_export_opcodes
= 0;
410 pAsm
->alu_x_opcode
= 0;
419 pAsm
->uLastPosUpdate
= 0;
421 *(BITS
*) &pAsm
->fp_stOutFmt0
= 0;
425 pAsm
->number_used_registers
= 0;
426 pAsm
->uUsedConsts
= 256;
430 pAsm
->uBoolConsts
= 0;
431 pAsm
->uIntConsts
= 0;
436 pAsm
->fc_stack
[0].type
= FC_NONE
;
441 pAsm
->aArgSubst
[3] = (-1);
445 for (i
=0; i
<NUMBER_OF_OUTPUT_COLORS
; i
++)
447 pAsm
->color_export_register_number
[i
] = (-1);
451 pAsm
->depth_export_register_number
= (-1);
452 pAsm
->stencil_export_register_number
= (-1);
453 pAsm
->coverage_to_mask_export_register_number
= (-1);
454 pAsm
->mask_export_register_number
= (-1);
456 pAsm
->starting_export_register_number
= 0;
457 pAsm
->starting_vfetch_register_number
= 0;
458 pAsm
->starting_temp_register_number
= 0;
459 pAsm
->uFirstHelpReg
= 0;
461 pAsm
->input_position_is_used
= GL_FALSE
;
462 pAsm
->input_normal_is_used
= GL_FALSE
;
464 for (i
=0; i
<NUMBER_OF_INPUT_COLORS
; i
++)
466 pAsm
->input_color_is_used
[ i
] = GL_FALSE
;
469 for (i
=0; i
<NUMBER_OF_TEXTURE_UNITS
; i
++)
471 pAsm
->input_texture_unit_is_used
[ i
] = GL_FALSE
;
474 for (i
=0; i
<VERT_ATTRIB_MAX
; i
++)
476 pAsm
->vfetch_instruction_ptr_array
[ i
] = NULL
;
479 pAsm
->number_of_inputs
= 0;
481 pAsm
->is_tex
= GL_FALSE
;
482 pAsm
->need_tex_barrier
= GL_FALSE
;
485 pAsm
->unSubArraySize
= 0;
486 pAsm
->unSubArrayPointer
= 0;
487 pAsm
->callers
= NULL
;
488 pAsm
->unCallerArraySize
= 0;
489 pAsm
->unCallerArrayPointer
= 0;
492 pAsm
->CALLSTACK
[0].FCSP_BeforeEntry
= 0;
493 pAsm
->CALLSTACK
[0].plstCFInstructions_local
494 = &(pAsm
->pR700Shader
->lstCFInstructions
);
496 pAsm
->CALLSTACK
[0].max
= 0;
497 pAsm
->CALLSTACK
[0].current
= 0;
499 SetActiveCFlist(pAsm
->pR700Shader
, pAsm
->CALLSTACK
[0].plstCFInstructions_local
);
506 GLboolean
IsTex(gl_inst_opcode Opcode
)
508 if( (OPCODE_TEX
==Opcode
) || (OPCODE_TXP
==Opcode
) || (OPCODE_TXB
==Opcode
) )
515 GLboolean
IsAlu(gl_inst_opcode Opcode
)
517 //TODO : more for fc and ex for higher spec.
525 int check_current_clause(r700_AssemblerBase
* pAsm
,
526 CF_CLAUSE_TYPE new_clause_type
)
528 if (pAsm
->cf_current_clause_type
!= new_clause_type
)
529 { //Close last open clause
530 switch (pAsm
->cf_current_clause_type
)
533 if ( pAsm
->cf_current_alu_clause_ptr
!= NULL
)
535 pAsm
->cf_current_alu_clause_ptr
= NULL
;
539 if ( pAsm
->cf_current_vtx_clause_ptr
!= NULL
)
541 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
545 if ( pAsm
->cf_current_tex_clause_ptr
!= NULL
)
547 pAsm
->cf_current_tex_clause_ptr
= NULL
;
550 case CF_EXPORT_CLAUSE
:
551 if ( pAsm
->cf_current_export_clause_ptr
!= NULL
)
553 pAsm
->cf_current_export_clause_ptr
= NULL
;
556 case CF_OTHER_CLAUSE
:
557 if ( pAsm
->cf_current_cf_clause_ptr
!= NULL
)
559 pAsm
->cf_current_cf_clause_ptr
= NULL
;
562 case CF_EMPTY_CLAUSE
:
566 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type
);
570 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
573 switch (new_clause_type
)
576 pAsm
->cf_current_clause_type
= CF_ALU_CLAUSE
;
579 pAsm
->cf_current_clause_type
= CF_VTX_CLAUSE
;
582 pAsm
->cf_current_clause_type
= CF_TEX_CLAUSE
;
584 case CF_EXPORT_CLAUSE
:
586 R700ControlFlowSXClause
* pR700ControlFlowSXClause
587 = (R700ControlFlowSXClause
*) CALLOC_STRUCT(R700ControlFlowSXClause
);
589 // Add new export instruction to control flow program
590 if (pR700ControlFlowSXClause
!= 0)
592 pAsm
->cf_current_export_clause_ptr
= pR700ControlFlowSXClause
;
593 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause
);
594 AddCFInstruction( pAsm
->pR700Shader
,
595 (R700ControlFlowInstruction
*)pR700ControlFlowSXClause
);
600 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
603 pAsm
->cf_current_clause_type
= CF_EXPORT_CLAUSE
;
606 case CF_EMPTY_CLAUSE
:
608 case CF_OTHER_CLAUSE
:
609 pAsm
->cf_current_clause_type
= CF_OTHER_CLAUSE
;
613 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type
);
621 GLboolean
add_cf_instruction(r700_AssemblerBase
* pAsm
)
623 if(GL_FALSE
== check_current_clause(pAsm
, CF_OTHER_CLAUSE
))
628 pAsm
->cf_current_cf_clause_ptr
=
629 (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
631 if (pAsm
->cf_current_cf_clause_ptr
!= NULL
)
633 Init_R700ControlFlowGenericClause(pAsm
->cf_current_cf_clause_ptr
);
634 AddCFInstruction( pAsm
->pR700Shader
,
635 (R700ControlFlowInstruction
*)pAsm
->cf_current_cf_clause_ptr
);
639 radeon_error("Could not allocate a new VFetch CF instruction.\n");
646 GLboolean
add_vfetch_instruction(r700_AssemblerBase
* pAsm
,
647 R700VertexInstruction
* vertex_instruction_ptr
)
649 if( GL_FALSE
== check_current_clause(pAsm
, CF_VTX_CLAUSE
) )
654 if( pAsm
->cf_current_vtx_clause_ptr
== NULL
||
655 ( (pAsm
->cf_current_vtx_clause_ptr
!= NULL
) &&
656 (pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
>= GetCFMaxInstructions(pAsm
->cf_current_vtx_clause_ptr
->m_ShaderInstType
)-1)
659 // Create new Vfetch control flow instruction for this new clause
660 pAsm
->cf_current_vtx_clause_ptr
= (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
662 if (pAsm
->cf_current_vtx_clause_ptr
!= NULL
)
664 Init_R700ControlFlowGenericClause(pAsm
->cf_current_vtx_clause_ptr
);
665 AddCFInstruction( pAsm
->pR700Shader
,
666 (R700ControlFlowInstruction
*)pAsm
->cf_current_vtx_clause_ptr
);
670 radeon_error("Could not allocate a new VFetch CF instruction.\n");
674 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.pop_count
= 0x0;
675 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
676 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
677 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
= 0x0;
678 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
679 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
680 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_VTX
;
681 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
682 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
684 LinkVertexInstruction(pAsm
->cf_current_vtx_clause_ptr
, vertex_instruction_ptr
);
688 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
++;
691 AddVTXInstruction(pAsm
->pR700Shader
, vertex_instruction_ptr
);
696 GLboolean
add_tex_instruction(r700_AssemblerBase
* pAsm
,
697 R700TextureInstruction
* tex_instruction_ptr
)
699 if ( GL_FALSE
== check_current_clause(pAsm
, CF_TEX_CLAUSE
) )
704 if ( pAsm
->cf_current_tex_clause_ptr
== NULL
||
705 ( (pAsm
->cf_current_tex_clause_ptr
!= NULL
) &&
706 (pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.count
>= GetCFMaxInstructions(pAsm
->cf_current_tex_clause_ptr
->m_ShaderInstType
)-1)
709 // new tex cf instruction for this new clause
710 pAsm
->cf_current_tex_clause_ptr
= (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
712 if (pAsm
->cf_current_tex_clause_ptr
!= NULL
)
714 Init_R700ControlFlowGenericClause(pAsm
->cf_current_tex_clause_ptr
);
715 AddCFInstruction( pAsm
->pR700Shader
,
716 (R700ControlFlowInstruction
*)pAsm
->cf_current_tex_clause_ptr
);
720 radeon_error("Could not allocate a new TEX CF instruction.\n");
724 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.pop_count
= 0x0;
725 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
726 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
728 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
729 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
730 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_TEX
;
731 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
732 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.barrier
= 0x0; //0x1;
736 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.count
++;
739 // If this clause constains any TEX instruction that is dependent on a previous instruction,
740 // set the barrier bit
741 if( pAsm
->pInstDeps
[pAsm
->uiCurInst
].nDstDep
> (-1) || pAsm
->need_tex_barrier
== GL_TRUE
)
743 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
746 if(NULL
== pAsm
->cf_current_tex_clause_ptr
->m_pLinkedTEXInstruction
)
748 pAsm
->cf_current_tex_clause_ptr
->m_pLinkedTEXInstruction
= tex_instruction_ptr
;
749 tex_instruction_ptr
->m_pLinkedGenericClause
= pAsm
->cf_current_tex_clause_ptr
;
752 AddTEXInstruction(pAsm
->pR700Shader
, tex_instruction_ptr
);
757 GLboolean
assemble_vfetch_instruction(r700_AssemblerBase
* pAsm
,
759 GLuint destination_register
,
760 GLuint number_of_elements
,
761 GLenum dataElementType
,
762 VTX_FETCH_METHOD
* pFetchMethod
)
764 GLuint client_size_inbyte
;
766 GLuint mega_fetch_count
;
767 GLuint is_mega_fetch_flag
;
769 R700VertexGenericFetch
* vfetch_instruction_ptr
;
770 R700VertexGenericFetch
* assembled_vfetch_instruction_ptr
= pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
];
772 if (assembled_vfetch_instruction_ptr
== NULL
)
774 vfetch_instruction_ptr
= (R700VertexGenericFetch
*) CALLOC_STRUCT(R700VertexGenericFetch
);
775 if (vfetch_instruction_ptr
== NULL
)
779 Init_R700VertexGenericFetch(vfetch_instruction_ptr
);
783 vfetch_instruction_ptr
= assembled_vfetch_instruction_ptr
;
786 data_format
= GetSurfaceFormat(dataElementType
, number_of_elements
, &client_size_inbyte
);
788 if(GL_TRUE
== pFetchMethod
->bEnableMini
) //More conditions here
794 mega_fetch_count
= MEGA_FETCH_BYTES
- 1;
795 is_mega_fetch_flag
= 0x1;
796 pFetchMethod
->mega_fetch_remainder
= MEGA_FETCH_BYTES
- client_size_inbyte
;
799 vfetch_instruction_ptr
->m_Word0
.f
.vtx_inst
= SQ_VTX_INST_FETCH
;
800 vfetch_instruction_ptr
->m_Word0
.f
.fetch_type
= SQ_VTX_FETCH_VERTEX_DATA
;
801 vfetch_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
803 vfetch_instruction_ptr
->m_Word0
.f
.buffer_id
= gl_client_id
;
804 vfetch_instruction_ptr
->m_Word0
.f
.src_gpr
= 0x0;
805 vfetch_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
806 vfetch_instruction_ptr
->m_Word0
.f
.src_sel_x
= SQ_SEL_X
;
807 vfetch_instruction_ptr
->m_Word0
.f
.mega_fetch_count
= mega_fetch_count
;
809 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (number_of_elements
< 1) ? SQ_SEL_0
: SQ_SEL_X
;
810 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (number_of_elements
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
811 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (number_of_elements
< 3) ? SQ_SEL_0
: SQ_SEL_Z
;
812 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (number_of_elements
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
814 vfetch_instruction_ptr
->m_Word1
.f
.use_const_fields
= 1;
816 // Destination register
817 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_gpr
= destination_register
;
818 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_rel
= SQ_ABSOLUTE
;
820 vfetch_instruction_ptr
->m_Word2
.f
.offset
= 0;
821 vfetch_instruction_ptr
->m_Word2
.f
.const_buf_no_stride
= 0x0;
823 vfetch_instruction_ptr
->m_Word2
.f
.mega_fetch
= is_mega_fetch_flag
;
825 if (assembled_vfetch_instruction_ptr
== NULL
)
827 if ( GL_FALSE
== add_vfetch_instruction(pAsm
, (R700VertexInstruction
*)vfetch_instruction_ptr
) )
832 if (pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
] != NULL
)
838 pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
] = vfetch_instruction_ptr
;
845 GLboolean
assemble_vfetch_instruction2(r700_AssemblerBase
* pAsm
,
846 GLuint destination_register
,
852 VTX_FETCH_METHOD
* pFetchMethod
)
854 GLuint client_size_inbyte
;
856 GLuint mega_fetch_count
;
857 GLuint is_mega_fetch_flag
;
859 R700VertexGenericFetch
* vfetch_instruction_ptr
;
860 R700VertexGenericFetch
* assembled_vfetch_instruction_ptr
861 = pAsm
->vfetch_instruction_ptr_array
[element
];
863 if (assembled_vfetch_instruction_ptr
== NULL
)
865 vfetch_instruction_ptr
= (R700VertexGenericFetch
*) CALLOC_STRUCT(R700VertexGenericFetch
);
866 if (vfetch_instruction_ptr
== NULL
)
870 Init_R700VertexGenericFetch(vfetch_instruction_ptr
);
874 vfetch_instruction_ptr
= assembled_vfetch_instruction_ptr
;
877 data_format
= GetSurfaceFormat(type
, size
, &client_size_inbyte
);
879 if(GL_TRUE
== pFetchMethod
->bEnableMini
) //More conditions here
885 mega_fetch_count
= MEGA_FETCH_BYTES
- 1;
886 is_mega_fetch_flag
= 0x1;
887 pFetchMethod
->mega_fetch_remainder
= MEGA_FETCH_BYTES
- client_size_inbyte
;
890 vfetch_instruction_ptr
->m_Word0
.f
.vtx_inst
= SQ_VTX_INST_FETCH
;
891 vfetch_instruction_ptr
->m_Word0
.f
.fetch_type
= SQ_VTX_FETCH_VERTEX_DATA
;
892 vfetch_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
894 vfetch_instruction_ptr
->m_Word0
.f
.buffer_id
= element
;
895 vfetch_instruction_ptr
->m_Word0
.f
.src_gpr
= 0x0;
896 vfetch_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
897 vfetch_instruction_ptr
->m_Word0
.f
.src_sel_x
= SQ_SEL_X
;
898 vfetch_instruction_ptr
->m_Word0
.f
.mega_fetch_count
= mega_fetch_count
;
900 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (size
< 1) ? SQ_SEL_0
: SQ_SEL_X
;
901 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (size
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
902 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (size
< 3) ? SQ_SEL_0
: SQ_SEL_Z
;
903 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (size
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
905 vfetch_instruction_ptr
->m_Word1
.f
.use_const_fields
= 1;
906 vfetch_instruction_ptr
->m_Word1
.f
.data_format
= data_format
;
907 vfetch_instruction_ptr
->m_Word2
.f
.endian_swap
= SQ_ENDIAN_NONE
;
911 vfetch_instruction_ptr
->m_Word1
.f
.format_comp_all
= SQ_FORMAT_COMP_SIGNED
;
915 vfetch_instruction_ptr
->m_Word1
.f
.format_comp_all
= SQ_FORMAT_COMP_UNSIGNED
;
918 if(GL_TRUE
== normalize
)
920 vfetch_instruction_ptr
->m_Word1
.f
.num_format_all
= SQ_NUM_FORMAT_NORM
;
924 vfetch_instruction_ptr
->m_Word1
.f
.num_format_all
= SQ_NUM_FORMAT_INT
;
927 // Destination register
928 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_gpr
= destination_register
;
929 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_rel
= SQ_ABSOLUTE
;
931 vfetch_instruction_ptr
->m_Word2
.f
.offset
= 0;
932 vfetch_instruction_ptr
->m_Word2
.f
.const_buf_no_stride
= 0x0;
934 vfetch_instruction_ptr
->m_Word2
.f
.mega_fetch
= is_mega_fetch_flag
;
936 if (assembled_vfetch_instruction_ptr
== NULL
)
938 if ( GL_FALSE
== add_vfetch_instruction(pAsm
, (R700VertexInstruction
*)vfetch_instruction_ptr
) )
943 if (pAsm
->vfetch_instruction_ptr_array
[element
] != NULL
)
949 pAsm
->vfetch_instruction_ptr_array
[element
] = vfetch_instruction_ptr
;
956 GLboolean
cleanup_vfetch_instructions(r700_AssemblerBase
* pAsm
)
959 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
960 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
962 for (i
=0; i
<VERT_ATTRIB_MAX
; i
++)
964 pAsm
->vfetch_instruction_ptr_array
[ i
] = NULL
;
967 cleanup_vfetch_shaderinst(pAsm
->pR700Shader
);
972 GLuint
gethelpr(r700_AssemblerBase
* pAsm
)
974 GLuint r
= pAsm
->uHelpReg
;
976 if (pAsm
->uHelpReg
> pAsm
->number_used_registers
)
978 pAsm
->number_used_registers
= pAsm
->uHelpReg
;
982 void resethelpr(r700_AssemblerBase
* pAsm
)
984 pAsm
->uHelpReg
= pAsm
->uFirstHelpReg
;
987 void checkop_init(r700_AssemblerBase
* pAsm
)
993 pAsm
->aArgSubst
[3] = -1;
996 GLboolean
mov_temp(r700_AssemblerBase
* pAsm
, int src
)
998 GLuint tmp
= gethelpr(pAsm
);
1000 //mov src to temp helper gpr.
1001 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
1003 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1005 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1006 pAsm
->D
.dst
.reg
= tmp
;
1008 nomask_PVSDST(&(pAsm
->D
.dst
));
1010 if( GL_FALSE
== assemble_src(pAsm
, src
, 0) )
1015 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
1016 noneg_PVSSRC(&(pAsm
->S
[0].src
));
1018 if( GL_FALSE
== next_ins(pAsm
) )
1023 pAsm
->aArgSubst
[1 + src
] = tmp
;
1028 GLboolean
checkop1(r700_AssemblerBase
* pAsm
)
1034 GLboolean
checkop2(r700_AssemblerBase
* pAsm
)
1036 GLboolean bSrcConst
[2];
1037 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1041 if( (pILInst
->SrcReg
[0].File
== PROGRAM_CONSTANT
) ||
1042 (pILInst
->SrcReg
[0].File
== PROGRAM_LOCAL_PARAM
) ||
1043 (pILInst
->SrcReg
[0].File
== PROGRAM_ENV_PARAM
) ||
1044 (pILInst
->SrcReg
[0].File
== PROGRAM_STATE_VAR
) )
1046 bSrcConst
[0] = GL_TRUE
;
1050 bSrcConst
[0] = GL_FALSE
;
1052 if( (pILInst
->SrcReg
[1].File
== PROGRAM_CONSTANT
) ||
1053 (pILInst
->SrcReg
[1].File
== PROGRAM_LOCAL_PARAM
) ||
1054 (pILInst
->SrcReg
[1].File
== PROGRAM_ENV_PARAM
) ||
1055 (pILInst
->SrcReg
[1].File
== PROGRAM_STATE_VAR
) )
1057 bSrcConst
[1] = GL_TRUE
;
1061 bSrcConst
[1] = GL_FALSE
;
1064 if( (bSrcConst
[0] == GL_TRUE
) && (bSrcConst
[1] == GL_TRUE
) )
1066 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[1].Index
)
1068 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1078 GLboolean
checkop3(r700_AssemblerBase
* pAsm
)
1080 GLboolean bSrcConst
[3];
1081 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1085 if( (pILInst
->SrcReg
[0].File
== PROGRAM_CONSTANT
) ||
1086 (pILInst
->SrcReg
[0].File
== PROGRAM_LOCAL_PARAM
) ||
1087 (pILInst
->SrcReg
[0].File
== PROGRAM_ENV_PARAM
) ||
1088 (pILInst
->SrcReg
[0].File
== PROGRAM_STATE_VAR
) )
1090 bSrcConst
[0] = GL_TRUE
;
1094 bSrcConst
[0] = GL_FALSE
;
1096 if( (pILInst
->SrcReg
[1].File
== PROGRAM_CONSTANT
) ||
1097 (pILInst
->SrcReg
[1].File
== PROGRAM_LOCAL_PARAM
) ||
1098 (pILInst
->SrcReg
[1].File
== PROGRAM_ENV_PARAM
) ||
1099 (pILInst
->SrcReg
[1].File
== PROGRAM_STATE_VAR
) )
1101 bSrcConst
[1] = GL_TRUE
;
1105 bSrcConst
[1] = GL_FALSE
;
1107 if( (pILInst
->SrcReg
[2].File
== PROGRAM_CONSTANT
) ||
1108 (pILInst
->SrcReg
[2].File
== PROGRAM_LOCAL_PARAM
) ||
1109 (pILInst
->SrcReg
[2].File
== PROGRAM_ENV_PARAM
) ||
1110 (pILInst
->SrcReg
[2].File
== PROGRAM_STATE_VAR
) )
1112 bSrcConst
[2] = GL_TRUE
;
1116 bSrcConst
[2] = GL_FALSE
;
1119 if( (GL_TRUE
== bSrcConst
[0]) &&
1120 (GL_TRUE
== bSrcConst
[1]) &&
1121 (GL_TRUE
== bSrcConst
[2]) )
1123 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1127 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1134 else if( (GL_TRUE
== bSrcConst
[0]) &&
1135 (GL_TRUE
== bSrcConst
[1]) )
1137 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[1].Index
)
1139 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1147 else if ( (GL_TRUE
== bSrcConst
[0]) &&
1148 (GL_TRUE
== bSrcConst
[2]) )
1150 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[2].Index
)
1152 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1160 else if( (GL_TRUE
== bSrcConst
[1]) &&
1161 (GL_TRUE
== bSrcConst
[2]) )
1163 if(pILInst
->SrcReg
[1].Index
!= pILInst
->SrcReg
[2].Index
)
1165 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1177 GLboolean
assemble_src(r700_AssemblerBase
*pAsm
,
1181 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1188 if(pAsm
->aArgSubst
[1+src
] >= 0)
1190 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1191 pAsm
->S
[fld
].src
.rtype
= SRC_REG_TEMPORARY
;
1192 pAsm
->S
[fld
].src
.reg
= pAsm
->aArgSubst
[1+src
];
1196 switch (pILInst
->SrcReg
[src
].File
)
1198 case PROGRAM_TEMPORARY
:
1199 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1200 pAsm
->S
[fld
].src
.rtype
= SRC_REG_TEMPORARY
;
1201 pAsm
->S
[fld
].src
.reg
= pILInst
->SrcReg
[src
].Index
+ pAsm
->starting_temp_register_number
;
1203 case PROGRAM_CONSTANT
:
1204 case PROGRAM_LOCAL_PARAM
:
1205 case PROGRAM_ENV_PARAM
:
1206 case PROGRAM_STATE_VAR
:
1207 case PROGRAM_UNIFORM
:
1208 if (1 == pILInst
->SrcReg
[src
].RelAddr
)
1210 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_RELATIVE_A0
);
1214 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1217 pAsm
->S
[fld
].src
.rtype
= SRC_REG_CONSTANT
;
1218 pAsm
->S
[fld
].src
.reg
= pILInst
->SrcReg
[src
].Index
;
1221 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1222 pAsm
->S
[fld
].src
.rtype
= SRC_REG_INPUT
;
1223 switch (pAsm
->currentShaderType
)
1226 pAsm
->S
[fld
].src
.reg
= pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[src
].Index
];
1229 pAsm
->S
[fld
].src
.reg
= pAsm
->ucVP_AttributeMap
[pILInst
->SrcReg
[src
].Index
];
1234 radeon_error("Invalid source argument type : %d \n", pILInst
->SrcReg
[src
].File
);
1239 pAsm
->S
[fld
].src
.swizzlex
= pILInst
->SrcReg
[src
].Swizzle
& 0x7;
1240 pAsm
->S
[fld
].src
.swizzley
= (pILInst
->SrcReg
[src
].Swizzle
>> 3) & 0x7;
1241 pAsm
->S
[fld
].src
.swizzlez
= (pILInst
->SrcReg
[src
].Swizzle
>> 6) & 0x7;
1242 pAsm
->S
[fld
].src
.swizzlew
= (pILInst
->SrcReg
[src
].Swizzle
>> 9) & 0x7;
1244 pAsm
->S
[fld
].src
.negx
= pILInst
->SrcReg
[src
].Negate
& 0x1;
1245 pAsm
->S
[fld
].src
.negy
= (pILInst
->SrcReg
[src
].Negate
>> 1) & 0x1;
1246 pAsm
->S
[fld
].src
.negz
= (pILInst
->SrcReg
[src
].Negate
>> 2) & 0x1;
1247 pAsm
->S
[fld
].src
.negw
= (pILInst
->SrcReg
[src
].Negate
>> 3) & 0x1;
1252 GLboolean
assemble_dst(r700_AssemblerBase
*pAsm
)
1254 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1255 switch (pILInst
->DstReg
.File
)
1257 case PROGRAM_TEMPORARY
:
1258 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1259 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1260 pAsm
->D
.dst
.reg
= pILInst
->DstReg
.Index
+ pAsm
->starting_temp_register_number
;
1262 case PROGRAM_ADDRESS
:
1263 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1264 pAsm
->D
.dst
.rtype
= DST_REG_A0
;
1265 pAsm
->D
.dst
.reg
= 0;
1267 case PROGRAM_OUTPUT
:
1268 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1269 pAsm
->D
.dst
.rtype
= DST_REG_OUT
;
1270 switch (pAsm
->currentShaderType
)
1273 pAsm
->D
.dst
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->DstReg
.Index
];
1276 pAsm
->D
.dst
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->DstReg
.Index
];
1281 radeon_error("Invalid destination output argument type\n");
1285 pAsm
->D
.dst
.writex
= pILInst
->DstReg
.WriteMask
& 0x1;
1286 pAsm
->D
.dst
.writey
= (pILInst
->DstReg
.WriteMask
>> 1) & 0x1;
1287 pAsm
->D
.dst
.writez
= (pILInst
->DstReg
.WriteMask
>> 2) & 0x1;
1288 pAsm
->D
.dst
.writew
= (pILInst
->DstReg
.WriteMask
>> 3) & 0x1;
1293 GLboolean
tex_dst(r700_AssemblerBase
*pAsm
)
1295 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1297 if(PROGRAM_TEMPORARY
== pILInst
->DstReg
.File
)
1299 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1300 pAsm
->D
.dst
.reg
= pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.Index
+ pAsm
->starting_temp_register_number
;
1302 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1304 else if(PROGRAM_OUTPUT
== pILInst
->DstReg
.File
)
1306 pAsm
->D
.dst
.rtype
= DST_REG_OUT
;
1307 switch (pAsm
->currentShaderType
)
1310 pAsm
->D
.dst
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->DstReg
.Index
];
1313 pAsm
->D
.dst
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->DstReg
.Index
];
1317 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1321 radeon_error("Invalid destination output argument type\n");
1325 pAsm
->D
.dst
.writex
= pILInst
->DstReg
.WriteMask
& 0x1;
1326 pAsm
->D
.dst
.writey
= (pILInst
->DstReg
.WriteMask
>> 1) & 0x1;
1327 pAsm
->D
.dst
.writez
= (pILInst
->DstReg
.WriteMask
>> 2) & 0x1;
1328 pAsm
->D
.dst
.writew
= (pILInst
->DstReg
.WriteMask
>> 3) & 0x1;
1333 GLboolean
tex_src(r700_AssemblerBase
*pAsm
)
1335 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1337 GLboolean bValidTexCoord
= GL_FALSE
;
1339 if(pAsm
->aArgSubst
[1] >= 0)
1341 bValidTexCoord
= GL_TRUE
;
1342 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
1343 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
1344 pAsm
->S
[0].src
.reg
= pAsm
->aArgSubst
[1];
1348 switch (pILInst
->SrcReg
[0].File
) {
1349 case PROGRAM_CONSTANT
:
1350 case PROGRAM_LOCAL_PARAM
:
1351 case PROGRAM_ENV_PARAM
:
1352 case PROGRAM_STATE_VAR
:
1354 case PROGRAM_TEMPORARY
:
1355 bValidTexCoord
= GL_TRUE
;
1356 pAsm
->S
[0].src
.reg
= pILInst
->SrcReg
[0].Index
+
1357 pAsm
->starting_temp_register_number
;
1358 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
1361 switch (pILInst
->SrcReg
[0].Index
)
1363 case FRAG_ATTRIB_WPOS
:
1364 case FRAG_ATTRIB_COL0
:
1365 case FRAG_ATTRIB_COL1
:
1366 case FRAG_ATTRIB_FOGC
:
1367 case FRAG_ATTRIB_TEX0
:
1368 case FRAG_ATTRIB_TEX1
:
1369 case FRAG_ATTRIB_TEX2
:
1370 case FRAG_ATTRIB_TEX3
:
1371 case FRAG_ATTRIB_TEX4
:
1372 case FRAG_ATTRIB_TEX5
:
1373 case FRAG_ATTRIB_TEX6
:
1374 case FRAG_ATTRIB_TEX7
:
1375 bValidTexCoord
= GL_TRUE
;
1376 pAsm
->S
[0].src
.reg
=
1377 pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[0].Index
];
1378 pAsm
->S
[0].src
.rtype
= SRC_REG_INPUT
;
1380 case FRAG_ATTRIB_FACE
:
1381 fprintf(stderr
, "FRAG_ATTRIB_FACE unsupported\n");
1383 case FRAG_ATTRIB_PNTC
:
1384 fprintf(stderr
, "FRAG_ATTRIB_PNTC unsupported\n");
1388 if( (pILInst
->SrcReg
[0].Index
>= FRAG_ATTRIB_VAR0
) ||
1389 (pILInst
->SrcReg
[0].Index
< FRAG_ATTRIB_MAX
) )
1391 bValidTexCoord
= GL_TRUE
;
1392 pAsm
->S
[0].src
.reg
=
1393 pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[0].Index
];
1394 pAsm
->S
[0].src
.rtype
= SRC_REG_INPUT
;
1401 if(GL_TRUE
== bValidTexCoord
)
1403 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
1407 radeon_error("Invalid source texcoord for TEX instruction\n");
1411 pAsm
->S
[0].src
.swizzlex
= pILInst
->SrcReg
[0].Swizzle
& 0x7;
1412 pAsm
->S
[0].src
.swizzley
= (pILInst
->SrcReg
[0].Swizzle
>> 3) & 0x7;
1413 pAsm
->S
[0].src
.swizzlez
= (pILInst
->SrcReg
[0].Swizzle
>> 6) & 0x7;
1414 pAsm
->S
[0].src
.swizzlew
= (pILInst
->SrcReg
[0].Swizzle
>> 9) & 0x7;
1416 pAsm
->S
[0].src
.negx
= pILInst
->SrcReg
[0].Negate
& 0x1;
1417 pAsm
->S
[0].src
.negy
= (pILInst
->SrcReg
[0].Negate
>> 1) & 0x1;
1418 pAsm
->S
[0].src
.negz
= (pILInst
->SrcReg
[0].Negate
>> 2) & 0x1;
1419 pAsm
->S
[0].src
.negw
= (pILInst
->SrcReg
[0].Negate
>> 3) & 0x1;
1424 GLboolean
assemble_tex_instruction(r700_AssemblerBase
*pAsm
, GLboolean normalized
)
1426 PVSSRC
* texture_coordinate_source
;
1427 PVSSRC
* texture_unit_source
;
1429 R700TextureInstruction
* tex_instruction_ptr
= (R700TextureInstruction
*) CALLOC_STRUCT(R700TextureInstruction
);
1430 if (tex_instruction_ptr
== NULL
)
1434 Init_R700TextureInstruction(tex_instruction_ptr
);
1436 texture_coordinate_source
= &(pAsm
->S
[0].src
);
1437 texture_unit_source
= &(pAsm
->S
[1].src
);
1439 tex_instruction_ptr
->m_Word0
.f
.tex_inst
= pAsm
->D
.dst
.opcode
;
1440 tex_instruction_ptr
->m_Word0
.f
.bc_frac_mode
= 0x0;
1441 tex_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
1443 tex_instruction_ptr
->m_Word0
.f
.resource_id
= texture_unit_source
->reg
;
1445 tex_instruction_ptr
->m_Word1
.f
.lod_bias
= 0x0;
1447 tex_instruction_ptr
->m_Word1
.f
.coord_type_x
= SQ_TEX_NORMALIZED
;
1448 tex_instruction_ptr
->m_Word1
.f
.coord_type_y
= SQ_TEX_NORMALIZED
;
1449 tex_instruction_ptr
->m_Word1
.f
.coord_type_z
= SQ_TEX_NORMALIZED
;
1450 tex_instruction_ptr
->m_Word1
.f
.coord_type_w
= SQ_TEX_NORMALIZED
;
1452 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
1453 tex_instruction_ptr
->m_Word1
.f
.coord_type_x
= SQ_TEX_UNNORMALIZED
;
1454 tex_instruction_ptr
->m_Word1
.f
.coord_type_y
= SQ_TEX_UNNORMALIZED
;
1455 tex_instruction_ptr
->m_Word1
.f
.coord_type_z
= SQ_TEX_UNNORMALIZED
;
1456 tex_instruction_ptr
->m_Word1
.f
.coord_type_w
= SQ_TEX_UNNORMALIZED
;
1459 tex_instruction_ptr
->m_Word2
.f
.offset_x
= 0x0;
1460 tex_instruction_ptr
->m_Word2
.f
.offset_y
= 0x0;
1461 tex_instruction_ptr
->m_Word2
.f
.offset_z
= 0x0;
1463 tex_instruction_ptr
->m_Word2
.f
.sampler_id
= texture_unit_source
->reg
;
1466 if ( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
1467 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
1469 tex_instruction_ptr
->m_Word0
.f
.src_gpr
= texture_coordinate_source
->reg
;
1470 tex_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
1472 tex_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
1473 tex_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
;
1475 tex_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (pAsm
->D
.dst
.writex
? texture_unit_source
->swizzlex
: SQ_SEL_MASK
);
1476 tex_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (pAsm
->D
.dst
.writey
? texture_unit_source
->swizzley
: SQ_SEL_MASK
);
1477 tex_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (pAsm
->D
.dst
.writez
? texture_unit_source
->swizzlez
: SQ_SEL_MASK
);
1478 tex_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (pAsm
->D
.dst
.writew
? texture_unit_source
->swizzlew
: SQ_SEL_MASK
);
1481 tex_instruction_ptr
->m_Word2
.f
.src_sel_x
= texture_coordinate_source
->swizzlex
;
1482 tex_instruction_ptr
->m_Word2
.f
.src_sel_y
= texture_coordinate_source
->swizzley
;
1483 tex_instruction_ptr
->m_Word2
.f
.src_sel_z
= texture_coordinate_source
->swizzlez
;
1484 tex_instruction_ptr
->m_Word2
.f
.src_sel_w
= texture_coordinate_source
->swizzlew
;
1488 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
1492 if( GL_FALSE
== add_tex_instruction(pAsm
, tex_instruction_ptr
) )
1500 void initialize(r700_AssemblerBase
*pAsm
)
1502 GLuint cycle
, component
;
1504 for (cycle
=0; cycle
<NUMBER_OF_CYCLES
; cycle
++)
1506 for (component
=0; component
<NUMBER_OF_COMPONENTS
; component
++)
1508 pAsm
->hw_gpr
[cycle
][component
] = (-1);
1511 for (component
=0; component
<NUMBER_OF_COMPONENTS
; component
++)
1513 pAsm
->hw_cfile_addr
[component
] = (-1);
1514 pAsm
->hw_cfile_chan
[component
] = (-1);
1518 GLboolean
assemble_alu_src(R700ALUInstruction
* alu_instruction_ptr
,
1521 BITS scalar_channel_index
)
1528 //--------------------------------------------------------------------------
1529 // Source for operands src0, src1.
1530 // Values [0,127] correspond to GPR[0..127].
1531 // Values [256,511] correspond to cfile constants c[0..255].
1533 //--------------------------------------------------------------------------
1534 // Other special values are shown in the list below.
1536 // 248 SQ_ALU_SRC_0: special constant 0.0.
1537 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
1539 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1540 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1542 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1543 // 253 SQ_ALU_SRC_LITERAL: literal constant.
1545 // 254 SQ_ALU_SRC_PV: previous vector result.
1546 // 255 SQ_ALU_SRC_PS: previous scalar result.
1547 //--------------------------------------------------------------------------
1549 BITS channel_swizzle
;
1550 switch (scalar_channel_index
)
1552 case 0: channel_swizzle
= pSource
->swizzlex
; break;
1553 case 1: channel_swizzle
= pSource
->swizzley
; break;
1554 case 2: channel_swizzle
= pSource
->swizzlez
; break;
1555 case 3: channel_swizzle
= pSource
->swizzlew
; break;
1556 default: channel_swizzle
= SQ_SEL_MASK
; break;
1559 if(channel_swizzle
== SQ_SEL_0
)
1561 src_sel
= SQ_ALU_SRC_0
;
1563 else if (channel_swizzle
== SQ_SEL_1
)
1565 src_sel
= SQ_ALU_SRC_1
;
1569 if ( (pSource
->rtype
== SRC_REG_TEMPORARY
) ||
1570 (pSource
->rtype
== SRC_REG_INPUT
)
1573 src_sel
= pSource
->reg
;
1575 else if (pSource
->rtype
== SRC_REG_CONSTANT
)
1577 src_sel
= pSource
->reg
+ CFILE_REGISTER_OFFSET
;
1579 else if (pSource
->rtype
== SRC_REC_LITERAL
)
1581 src_sel
= SQ_ALU_SRC_LITERAL
;
1585 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
1586 source_index
, pSource
->rtype
);
1591 if( ADDR_ABSOLUTE
== addrmode_PVSSRC(pSource
) )
1593 src_rel
= SQ_ABSOLUTE
;
1597 src_rel
= SQ_RELATIVE
;
1600 switch (channel_swizzle
)
1603 src_chan
= SQ_CHAN_X
;
1606 src_chan
= SQ_CHAN_Y
;
1609 src_chan
= SQ_CHAN_Z
;
1612 src_chan
= SQ_CHAN_W
;
1616 // Does not matter since src_sel controls
1617 src_chan
= SQ_CHAN_X
;
1620 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle
);
1625 switch (scalar_channel_index
)
1627 case 0: src_neg
= pSource
->negx
; break;
1628 case 1: src_neg
= pSource
->negy
; break;
1629 case 2: src_neg
= pSource
->negz
; break;
1630 case 3: src_neg
= pSource
->negw
; break;
1631 default: src_neg
= 0; break;
1634 switch (source_index
)
1637 alu_instruction_ptr
->m_Word0
.f
.src0_sel
= src_sel
;
1638 alu_instruction_ptr
->m_Word0
.f
.src0_rel
= src_rel
;
1639 alu_instruction_ptr
->m_Word0
.f
.src0_chan
= src_chan
;
1640 alu_instruction_ptr
->m_Word0
.f
.src0_neg
= src_neg
;
1643 alu_instruction_ptr
->m_Word0
.f
.src1_sel
= src_sel
;
1644 alu_instruction_ptr
->m_Word0
.f
.src1_rel
= src_rel
;
1645 alu_instruction_ptr
->m_Word0
.f
.src1_chan
= src_chan
;
1646 alu_instruction_ptr
->m_Word0
.f
.src1_neg
= src_neg
;
1649 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_sel
= src_sel
;
1650 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_rel
= src_rel
;
1651 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_chan
= src_chan
;
1652 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_neg
= src_neg
;
1655 radeon_error("Only three sources allowed in ALU opcodes.\n");
1663 GLboolean
add_alu_instruction(r700_AssemblerBase
* pAsm
,
1664 R700ALUInstruction
* alu_instruction_ptr
,
1665 GLuint contiguous_slots_needed
)
1667 if( GL_FALSE
== check_current_clause(pAsm
, CF_ALU_CLAUSE
) )
1672 if ( pAsm
->alu_x_opcode
!= 0 ||
1673 pAsm
->cf_current_alu_clause_ptr
== NULL
||
1674 ( (pAsm
->cf_current_alu_clause_ptr
!= NULL
) &&
1675 (pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
>= (GetCFMaxInstructions(pAsm
->cf_current_alu_clause_ptr
->m_ShaderInstType
)-contiguous_slots_needed
-1) )
1679 //new cf inst for this clause
1680 pAsm
->cf_current_alu_clause_ptr
= (R700ControlFlowALUClause
*) CALLOC_STRUCT(R700ControlFlowALUClause
);
1682 // link the new cf to cf segment
1683 if(NULL
!= pAsm
->cf_current_alu_clause_ptr
)
1685 Init_R700ControlFlowALUClause(pAsm
->cf_current_alu_clause_ptr
);
1686 AddCFInstruction( pAsm
->pR700Shader
,
1687 (R700ControlFlowInstruction
*)pAsm
->cf_current_alu_clause_ptr
);
1691 radeon_error("Could not allocate a new ALU CF instruction.\n");
1695 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank0
= 0x0;
1696 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank1
= 0x0;
1697 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_mode0
= SQ_CF_KCACHE_NOP
;
1699 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_mode1
= SQ_CF_KCACHE_NOP
;
1700 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr0
= 0x0;
1701 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr1
= 0x0;
1703 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
= 0x0;
1705 if(pAsm
->alu_x_opcode
!= 0)
1707 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.cf_inst
= pAsm
->alu_x_opcode
;
1708 pAsm
->alu_x_opcode
= 0;
1712 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_ALU
;
1715 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
1717 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
1721 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
++;
1724 // If this clause constains any instruction that is forward dependent on a TEX instruction,
1725 // set the whole_quad_mode for this clause
1726 if ( pAsm
->pInstDeps
[pAsm
->uiCurInst
].nDstDep
> (-1) )
1728 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x1;
1731 if (pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
>= (GetCFMaxInstructions(pAsm
->cf_current_alu_clause_ptr
->m_ShaderInstType
)-1) )
1733 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
1736 if(NULL
== pAsm
->cf_current_alu_clause_ptr
->m_pLinkedALUInstruction
)
1738 pAsm
->cf_current_alu_clause_ptr
->m_pLinkedALUInstruction
= alu_instruction_ptr
;
1739 alu_instruction_ptr
->m_pLinkedALUClause
= pAsm
->cf_current_alu_clause_ptr
;
1742 AddALUInstruction(pAsm
->pR700Shader
, alu_instruction_ptr
);
1747 void get_src_properties(R700ALUInstruction
* alu_instruction_ptr
,
1754 switch (source_index
)
1757 *psrc_sel
= alu_instruction_ptr
->m_Word0
.f
.src0_sel
;
1758 *psrc_rel
= alu_instruction_ptr
->m_Word0
.f
.src0_rel
;
1759 *psrc_chan
= alu_instruction_ptr
->m_Word0
.f
.src0_chan
;
1760 *psrc_neg
= alu_instruction_ptr
->m_Word0
.f
.src0_neg
;
1764 *psrc_sel
= alu_instruction_ptr
->m_Word0
.f
.src1_sel
;
1765 *psrc_rel
= alu_instruction_ptr
->m_Word0
.f
.src1_rel
;
1766 *psrc_chan
= alu_instruction_ptr
->m_Word0
.f
.src1_chan
;
1767 *psrc_neg
= alu_instruction_ptr
->m_Word0
.f
.src1_neg
;
1771 *psrc_sel
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_sel
;
1772 *psrc_rel
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_rel
;
1773 *psrc_chan
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_chan
;
1774 *psrc_neg
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_neg
;
1779 int is_cfile(BITS sel
)
1781 if (sel
> 255 && sel
< 512)
1788 int is_const(BITS sel
)
1794 else if(sel
>= SQ_ALU_SRC_0
&& sel
<= SQ_ALU_SRC_LITERAL
)
1801 int is_gpr(BITS sel
)
1803 if (sel
>= 0 && sel
< 128)
1810 const GLuint BANK_SWIZZLE_VEC
[8] = {SQ_ALU_VEC_210
, //000
1811 SQ_ALU_VEC_120
, //001
1812 SQ_ALU_VEC_102
, //010
1814 SQ_ALU_VEC_201
, //011
1815 SQ_ALU_VEC_012
, //100
1816 SQ_ALU_VEC_021
, //101
1818 SQ_ALU_VEC_012
, //110
1819 SQ_ALU_VEC_012
}; //111
1821 const GLuint BANK_SWIZZLE_SCL
[8] = {SQ_ALU_SCL_210
, //000
1822 SQ_ALU_SCL_122
, //001
1823 SQ_ALU_SCL_122
, //010
1825 SQ_ALU_SCL_221
, //011
1826 SQ_ALU_SCL_212
, //100
1827 SQ_ALU_SCL_122
, //101
1829 SQ_ALU_SCL_122
, //110
1830 SQ_ALU_SCL_122
}; //111
1832 GLboolean
reserve_cfile(r700_AssemblerBase
* pAsm
,
1836 int res_match
= (-1);
1837 int res_empty
= (-1);
1841 for (res
=3; res
>=0; res
--)
1843 if(pAsm
->hw_cfile_addr
[ res
] < 0)
1847 else if( (pAsm
->hw_cfile_addr
[res
] == (int)sel
)
1849 (pAsm
->hw_cfile_chan
[ res
] == (int) chan
) )
1857 // Read for this scalar component already reserved, nothing to do here.
1860 else if(res_empty
>= 0)
1862 pAsm
->hw_cfile_addr
[ res_empty
] = sel
;
1863 pAsm
->hw_cfile_chan
[ res_empty
] = chan
;
1867 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
1873 GLboolean
reserve_gpr(r700_AssemblerBase
* pAsm
, GLuint sel
, GLuint chan
, GLuint cycle
)
1875 if(pAsm
->hw_gpr
[cycle
][chan
] < 0)
1877 pAsm
->hw_gpr
[cycle
][chan
] = sel
;
1879 else if(pAsm
->hw_gpr
[cycle
][chan
] != (int)sel
)
1881 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
1888 GLboolean
cycle_for_scalar_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
)
1892 case SQ_ALU_SCL_210
:
1894 int table
[3] = {2, 1, 0};
1895 *pCycle
= table
[sel
];
1899 case SQ_ALU_SCL_122
:
1901 int table
[3] = {1, 2, 2};
1902 *pCycle
= table
[sel
];
1906 case SQ_ALU_SCL_212
:
1908 int table
[3] = {2, 1, 2};
1909 *pCycle
= table
[sel
];
1913 case SQ_ALU_SCL_221
:
1915 int table
[3] = {2, 2, 1};
1916 *pCycle
= table
[sel
];
1921 radeon_error("Bad Scalar bank swizzle value\n");
1928 GLboolean
cycle_for_vector_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
)
1932 case SQ_ALU_VEC_012
:
1934 int table
[3] = {0, 1, 2};
1935 *pCycle
= table
[sel
];
1938 case SQ_ALU_VEC_021
:
1940 int table
[3] = {0, 2, 1};
1941 *pCycle
= table
[sel
];
1944 case SQ_ALU_VEC_120
:
1946 int table
[3] = {1, 2, 0};
1947 *pCycle
= table
[sel
];
1950 case SQ_ALU_VEC_102
:
1952 int table
[3] = {1, 0, 2};
1953 *pCycle
= table
[sel
];
1956 case SQ_ALU_VEC_201
:
1958 int table
[3] = {2, 0, 1};
1959 *pCycle
= table
[sel
];
1962 case SQ_ALU_VEC_210
:
1964 int table
[3] = {2, 1, 0};
1965 *pCycle
= table
[sel
];
1969 radeon_error("Bad Vec bank swizzle value\n");
1977 GLboolean
check_scalar(r700_AssemblerBase
* pAsm
,
1978 R700ALUInstruction
* alu_instruction_ptr
)
1981 GLuint bank_swizzle
;
1982 GLuint const_count
= 0;
1991 BITS src_sel
[3] = {0,0,0};
1992 BITS src_chan
[3] = {0,0,0};
1993 BITS src_rel
[3] = {0,0,0};
1994 BITS src_neg
[3] = {0,0,0};
1998 GLuint number_of_operands
= r700GetNumOperands(pAsm
);
2000 for (src
=0; src
<number_of_operands
; src
++)
2002 get_src_properties(alu_instruction_ptr
,
2011 swizzle_key
= ( (is_const( src_sel
[0] ) ? 4 : 0) +
2012 (is_const( src_sel
[1] ) ? 2 : 0) +
2013 (is_const( src_sel
[2] ) ? 1 : 0) );
2015 alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
= BANK_SWIZZLE_SCL
[ swizzle_key
];
2017 for (src
=0; src
<number_of_operands
; src
++)
2019 sel
= src_sel
[src
];
2020 chan
= src_chan
[src
];
2021 rel
= src_rel
[src
];
2022 neg
= src_neg
[src
];
2024 if (is_const( sel
))
2026 // Any constant, including literal and inline constants
2029 if (is_cfile( sel
))
2031 reserve_cfile(pAsm
, sel
, chan
);
2037 for (src
=0; src
<number_of_operands
; src
++)
2039 sel
= src_sel
[src
];
2040 chan
= src_chan
[src
];
2041 rel
= src_rel
[src
];
2042 neg
= src_neg
[src
];
2046 bank_swizzle
= alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
;
2048 if( GL_FALSE
== cycle_for_scalar_bank_swizzle(bank_swizzle
, src
, &cycle
) )
2053 if(cycle
< const_count
)
2055 if( GL_FALSE
== reserve_gpr(pAsm
, sel
, chan
, cycle
) )
2066 GLboolean
check_vector(r700_AssemblerBase
* pAsm
,
2067 R700ALUInstruction
* alu_instruction_ptr
)
2070 GLuint bank_swizzle
;
2071 GLuint const_count
= 0;
2080 BITS src_sel
[3] = {0,0,0};
2081 BITS src_chan
[3] = {0,0,0};
2082 BITS src_rel
[3] = {0,0,0};
2083 BITS src_neg
[3] = {0,0,0};
2087 GLuint number_of_operands
= r700GetNumOperands(pAsm
);
2089 for (src
=0; src
<number_of_operands
; src
++)
2091 get_src_properties(alu_instruction_ptr
,
2100 swizzle_key
= ( (is_const( src_sel
[0] ) ? 4 : 0) +
2101 (is_const( src_sel
[1] ) ? 2 : 0) +
2102 (is_const( src_sel
[2] ) ? 1 : 0)
2105 alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
= BANK_SWIZZLE_VEC
[swizzle_key
];
2107 for (src
=0; src
<number_of_operands
; src
++)
2109 sel
= src_sel
[src
];
2110 chan
= src_chan
[src
];
2111 rel
= src_rel
[src
];
2112 neg
= src_neg
[src
];
2115 bank_swizzle
= alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
;
2119 if( GL_FALSE
== cycle_for_vector_bank_swizzle(bank_swizzle
, src
, &cycle
) )
2125 (sel
== src_sel
[0]) &&
2126 (chan
== src_chan
[0]) )
2131 if( GL_FALSE
== reserve_gpr(pAsm
, sel
, chan
, cycle
) )
2137 else if( is_const(sel
) )
2143 if( GL_FALSE
== reserve_cfile(pAsm
, sel
, chan
) )
2154 GLboolean
assemble_alu_instruction(r700_AssemblerBase
*pAsm
)
2156 GLuint number_of_scalar_operations
;
2157 GLboolean is_single_scalar_operation
;
2158 GLuint scalar_channel_index
;
2160 PVSSRC
* pcurrent_source
;
2161 int current_source_index
;
2162 GLuint contiguous_slots_needed
;
2164 GLuint uNumSrc
= r700GetNumOperands(pAsm
);
2165 //GLuint channel_swizzle, j;
2166 //GLuint chan_counter[4] = {0, 0, 0, 0};
2167 //PVSSRC * pSource[3];
2168 GLboolean bSplitInst
= GL_FALSE
;
2170 if (1 == pAsm
->D
.dst
.math
)
2172 is_single_scalar_operation
= GL_TRUE
;
2173 number_of_scalar_operations
= 1;
2177 is_single_scalar_operation
= GL_FALSE
;
2178 number_of_scalar_operations
= 4;
2180 /* current assembler doesn't do more than 1 register per source */
2182 /* check read port, only very preliminary algorithm, not count in
2183 src0/1 same comp case and prev slot repeat case; also not count relative
2184 addressing. TODO: improve performance. */
2185 for(j
=0; j
<uNumSrc
; j
++)
2187 pSource
[j
] = &(pAsm
->S
[j
].src
);
2189 for(scalar_channel_index
=0; scalar_channel_index
<4; scalar_channel_index
++)
2191 for(j
=0; j
<uNumSrc
; j
++)
2193 switch (scalar_channel_index
)
2195 case 0: channel_swizzle
= pSource
[j
]->swizzlex
; break;
2196 case 1: channel_swizzle
= pSource
[j
]->swizzley
; break;
2197 case 2: channel_swizzle
= pSource
[j
]->swizzlez
; break;
2198 case 3: channel_swizzle
= pSource
[j
]->swizzlew
; break;
2199 default: channel_swizzle
= SQ_SEL_MASK
; break;
2201 if ( ((pSource
[j
]->rtype
== SRC_REG_TEMPORARY
) ||
2202 (pSource
[j
]->rtype
== SRC_REG_INPUT
))
2203 && (channel_swizzle
<= SQ_SEL_W
) )
2205 chan_counter
[channel_swizzle
]++;
2209 if( (chan_counter
[SQ_SEL_X
] > 3)
2210 || (chan_counter
[SQ_SEL_Y
] > 3)
2211 || (chan_counter
[SQ_SEL_Z
] > 3)
2212 || (chan_counter
[SQ_SEL_W
] > 3) ) /* each chan bank has only 3 ports. */
2214 bSplitInst
= GL_TRUE
;
2219 contiguous_slots_needed
= 0;
2221 if(GL_TRUE
== is_reduction_opcode(&(pAsm
->D
)) )
2223 contiguous_slots_needed
= 4;
2228 for (scalar_channel_index
=0;
2229 scalar_channel_index
< number_of_scalar_operations
;
2230 scalar_channel_index
++)
2232 R700ALUInstruction
* alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2233 if (alu_instruction_ptr
== NULL
)
2237 Init_R700ALUInstruction(alu_instruction_ptr
);
2240 current_source_index
= 0;
2241 pcurrent_source
= &(pAsm
->S
[0].src
);
2243 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2244 current_source_index
,
2246 scalar_channel_index
) )
2254 current_source_index
= 1;
2255 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2257 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2258 current_source_index
,
2260 scalar_channel_index
) )
2267 alu_instruction_ptr
->m_Word0
.f
.index_mode
= SQ_INDEX_AR_X
;
2269 if( (is_single_scalar_operation
== GL_TRUE
)
2270 || (GL_TRUE
== bSplitInst
) )
2272 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
2276 alu_instruction_ptr
->m_Word0
.f
.last
= (scalar_channel_index
== 3) ? 1 : 0;
2279 alu_instruction_ptr
->m_Word0
.f
.pred_sel
= 0x0;
2280 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0x0;
2281 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0x0;
2284 if( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
2285 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
2287 alu_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
2291 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2295 alu_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
; //D.rtype
2297 if ( is_single_scalar_operation
== GL_TRUE
)
2299 // Override scalar_channel_index since only one scalar value will be written
2300 if(pAsm
->D
.dst
.writex
)
2302 scalar_channel_index
= 0;
2304 else if(pAsm
->D
.dst
.writey
)
2306 scalar_channel_index
= 1;
2308 else if(pAsm
->D
.dst
.writez
)
2310 scalar_channel_index
= 2;
2312 else if(pAsm
->D
.dst
.writew
)
2314 scalar_channel_index
= 3;
2318 alu_instruction_ptr
->m_Word1
.f
.dst_chan
= scalar_channel_index
;
2320 alu_instruction_ptr
->m_Word1
.f
.clamp
= pAsm
->pILInst
[pAsm
->uiCurInst
].SaturateMode
;
2322 if (pAsm
->D
.dst
.op3
)
2326 alu_instruction_ptr
->m_Word1_OP3
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2328 //There's 3rd src for op3
2329 current_source_index
= 2;
2330 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2332 if ( GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2333 current_source_index
,
2335 scalar_channel_index
) )
2345 alu_instruction_ptr
->m_Word1_OP2
.f6
.alu_inst
= pAsm
->D
.dst
.opcode
;
2347 alu_instruction_ptr
->m_Word1_OP2
.f6
.src0_abs
= 0x0;
2348 alu_instruction_ptr
->m_Word1_OP2
.f6
.src1_abs
= 0x0;
2350 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2351 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2352 switch (scalar_channel_index
)
2355 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writex
;
2358 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writey
;
2361 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writez
;
2364 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writew
;
2367 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= 1; //SQ_SEL_MASK;
2370 alu_instruction_ptr
->m_Word1_OP2
.f6
.omod
= SQ_ALU_OMOD_OFF
;
2374 alu_instruction_ptr
->m_Word1_OP2
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2376 alu_instruction_ptr
->m_Word1_OP2
.f
.src0_abs
= 0x0;
2377 alu_instruction_ptr
->m_Word1_OP2
.f
.src1_abs
= 0x0;
2379 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2380 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2381 switch (scalar_channel_index
)
2384 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writex
;
2387 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writey
;
2390 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writez
;
2393 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writew
;
2396 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= 1; //SQ_SEL_MASK;
2399 alu_instruction_ptr
->m_Word1_OP2
.f
.omod
= SQ_ALU_OMOD_OFF
;
2403 if(GL_FALSE
== add_alu_instruction(pAsm
, alu_instruction_ptr
, contiguous_slots_needed
) )
2409 * Judge the type of current instruction, is it vector or scalar
2412 if (is_single_scalar_operation
)
2414 if(GL_FALSE
== check_scalar(pAsm
, alu_instruction_ptr
) )
2421 if(GL_FALSE
== check_vector(pAsm
, alu_instruction_ptr
) )
2427 contiguous_slots_needed
= 0;
2433 GLboolean
assemble_alu_instruction2(r700_AssemblerBase
*pAsm
)
2435 GLuint number_of_scalar_operations
;
2436 GLboolean is_single_scalar_operation
;
2437 GLuint scalar_channel_index
;
2439 PVSSRC
* pcurrent_source
;
2440 int current_source_index
;
2441 GLuint contiguous_slots_needed
;
2443 GLuint uNumSrc
= r700GetNumOperands(pAsm
);
2445 GLboolean bSplitInst
= GL_FALSE
;
2447 if (1 == pAsm
->D
.dst
.math
)
2449 is_single_scalar_operation
= GL_TRUE
;
2450 number_of_scalar_operations
= 1;
2454 is_single_scalar_operation
= GL_FALSE
;
2455 number_of_scalar_operations
= 4;
2458 contiguous_slots_needed
= 0;
2460 if(GL_TRUE
== is_reduction_opcode(&(pAsm
->D
)) )
2462 contiguous_slots_needed
= 4;
2467 for (scalar_channel_index
=0;
2468 scalar_channel_index
< number_of_scalar_operations
;
2469 scalar_channel_index
++)
2471 R700ALUInstruction
* alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2472 if (alu_instruction_ptr
== NULL
)
2476 Init_R700ALUInstruction(alu_instruction_ptr
);
2479 current_source_index
= 0;
2480 pcurrent_source
= &(pAsm
->S
[0].src
);
2482 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2483 current_source_index
,
2485 scalar_channel_index
) )
2493 current_source_index
= 1;
2494 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2496 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2497 current_source_index
,
2499 scalar_channel_index
) )
2506 alu_instruction_ptr
->m_Word0
.f
.index_mode
= SQ_INDEX_LOOP
;
2508 if( (is_single_scalar_operation
== GL_TRUE
)
2509 || (GL_TRUE
== bSplitInst
) )
2511 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
2515 alu_instruction_ptr
->m_Word0
.f
.last
= (scalar_channel_index
== 3) ? 1 : 0;
2518 alu_instruction_ptr
->m_Word0
.f
.pred_sel
= (pAsm
->D
.dst
.pred_inv
> 0) ? 1 : 0;
2519 if(1 == pAsm
->D
.dst
.predicated
)
2521 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0x1;
2522 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0x1;
2526 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0x0;
2527 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0x0;
2531 if( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
2532 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
2534 alu_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
2538 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2542 alu_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
; //D.rtype
2544 if ( is_single_scalar_operation
== GL_TRUE
)
2546 // Override scalar_channel_index since only one scalar value will be written
2547 if(pAsm
->D
.dst
.writex
)
2549 scalar_channel_index
= 0;
2551 else if(pAsm
->D
.dst
.writey
)
2553 scalar_channel_index
= 1;
2555 else if(pAsm
->D
.dst
.writez
)
2557 scalar_channel_index
= 2;
2559 else if(pAsm
->D
.dst
.writew
)
2561 scalar_channel_index
= 3;
2565 alu_instruction_ptr
->m_Word1
.f
.dst_chan
= scalar_channel_index
;
2567 alu_instruction_ptr
->m_Word1
.f
.clamp
= pAsm
->D2
.dst2
.SaturateMode
;
2569 if (pAsm
->D
.dst
.op3
)
2573 alu_instruction_ptr
->m_Word1_OP3
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2575 //There's 3rd src for op3
2576 current_source_index
= 2;
2577 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2579 if ( GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2580 current_source_index
,
2582 scalar_channel_index
) )
2592 alu_instruction_ptr
->m_Word1_OP2
.f6
.alu_inst
= pAsm
->D
.dst
.opcode
;
2594 alu_instruction_ptr
->m_Word1_OP2
.f6
.src0_abs
= 0x0;
2595 alu_instruction_ptr
->m_Word1_OP2
.f6
.src1_abs
= 0x0;
2597 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2598 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2599 switch (scalar_channel_index
)
2602 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writex
;
2605 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writey
;
2608 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writez
;
2611 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writew
;
2614 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= 1; //SQ_SEL_MASK;
2617 alu_instruction_ptr
->m_Word1_OP2
.f6
.omod
= SQ_ALU_OMOD_OFF
;
2621 alu_instruction_ptr
->m_Word1_OP2
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2623 alu_instruction_ptr
->m_Word1_OP2
.f
.src0_abs
= 0x0;
2624 alu_instruction_ptr
->m_Word1_OP2
.f
.src1_abs
= 0x0;
2626 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2627 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2628 switch (scalar_channel_index
)
2631 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writex
;
2634 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writey
;
2637 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writez
;
2640 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writew
;
2643 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= 1; //SQ_SEL_MASK;
2646 alu_instruction_ptr
->m_Word1_OP2
.f
.omod
= SQ_ALU_OMOD_OFF
;
2650 if(GL_FALSE
== add_alu_instruction(pAsm
, alu_instruction_ptr
, contiguous_slots_needed
) )
2656 * Judge the type of current instruction, is it vector or scalar
2659 if (is_single_scalar_operation
)
2661 if(GL_FALSE
== check_scalar(pAsm
, alu_instruction_ptr
) )
2668 if(GL_FALSE
== check_vector(pAsm
, alu_instruction_ptr
) )
2674 contiguous_slots_needed
= 0;
2680 GLboolean
assemble_alu_instruction_literal(r700_AssemblerBase
*pAsm
, GLfloat
* pLiteral
)
2682 R700ALUInstruction
* alu_instruction_ptr
;
2683 R700ALUInstructionHalfLiteral
* alu_instruction_ptr_hl
;
2684 R700ALUInstructionFullLiteral
* alu_instruction_ptr_fl
;
2686 GLuint number_of_scalar_operations
;
2687 GLboolean is_single_scalar_operation
;
2688 GLuint scalar_channel_index
;
2690 GLuint contiguous_slots_needed
;
2691 GLuint lastInstruction
;
2692 GLuint not_masked
[4];
2694 GLuint uNumSrc
= r700GetNumOperands(pAsm
);
2696 GLboolean bSplitInst
= GL_FALSE
;
2698 number_of_scalar_operations
= 0;
2699 contiguous_slots_needed
= 0;
2701 if(1 == pAsm
->D
.dst
.writew
)
2703 lastInstruction
= 3;
2704 number_of_scalar_operations
++;
2711 if(1 == pAsm
->D
.dst
.writez
)
2713 lastInstruction
= 2;
2714 number_of_scalar_operations
++;
2721 if(1 == pAsm
->D
.dst
.writey
)
2723 lastInstruction
= 1;
2724 number_of_scalar_operations
++;
2731 if(1 == pAsm
->D
.dst
.writex
)
2733 lastInstruction
= 0;
2734 number_of_scalar_operations
++;
2742 if(GL_TRUE
== is_reduction_opcode(&(pAsm
->D
)) )
2744 contiguous_slots_needed
= 4;
2748 contiguous_slots_needed
= number_of_scalar_operations
;
2751 if(1 == pAsm
->D2
.dst2
.literal
)
2753 contiguous_slots_needed
+= 1;
2755 else if(2 == pAsm
->D2
.dst2
.literal
)
2757 contiguous_slots_needed
+= 2;
2762 for (scalar_channel_index
=0; scalar_channel_index
< 4; scalar_channel_index
++)
2764 if(0 == not_masked
[scalar_channel_index
])
2769 if(scalar_channel_index
== lastInstruction
)
2771 switch (pAsm
->D2
.dst2
.literal
)
2774 alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2775 if (alu_instruction_ptr
== NULL
)
2779 Init_R700ALUInstruction(alu_instruction_ptr
);
2782 alu_instruction_ptr_hl
= (R700ALUInstructionHalfLiteral
*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral
);
2783 if (alu_instruction_ptr_hl
== NULL
)
2787 Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl
, pLiteral
[0], pLiteral
[1]);
2788 alu_instruction_ptr
= (R700ALUInstruction
*)alu_instruction_ptr_hl
;
2791 alu_instruction_ptr_fl
= (R700ALUInstructionFullLiteral
*) CALLOC_STRUCT(R700ALUInstructionFullLiteral
);
2792 if (alu_instruction_ptr_fl
== NULL
)
2796 Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl
, pLiteral
[0], pLiteral
[1], pLiteral
[2], pLiteral
[3]);
2797 alu_instruction_ptr
= (R700ALUInstruction
*)alu_instruction_ptr_fl
;
2805 alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2806 if (alu_instruction_ptr
== NULL
)
2810 Init_R700ALUInstruction(alu_instruction_ptr
);
2814 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2817 scalar_channel_index
) )
2825 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2828 scalar_channel_index
) )
2835 alu_instruction_ptr
->m_Word0
.f
.index_mode
= SQ_INDEX_LOOP
;
2837 if(scalar_channel_index
== lastInstruction
)
2839 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
2842 alu_instruction_ptr
->m_Word0
.f
.pred_sel
= 0x0;
2843 if(1 == pAsm
->D
.dst
.predicated
)
2845 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0x1;
2846 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0x1;
2850 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0;
2851 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0;
2855 if( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
2856 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
2858 alu_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
2862 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2866 alu_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
; //D.rtype
2868 alu_instruction_ptr
->m_Word1
.f
.dst_chan
= scalar_channel_index
;
2870 alu_instruction_ptr
->m_Word1
.f
.clamp
= pAsm
->D2
.dst2
.SaturateMode
;
2872 if (pAsm
->D
.dst
.op3
)
2875 alu_instruction_ptr
->m_Word1_OP3
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2877 //There's 3rd src for op3
2878 if ( GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2881 scalar_channel_index
) )
2891 alu_instruction_ptr
->m_Word1_OP2
.f6
.alu_inst
= pAsm
->D
.dst
.opcode
;
2892 alu_instruction_ptr
->m_Word1_OP2
.f6
.src0_abs
= 0x0;
2893 alu_instruction_ptr
->m_Word1_OP2
.f6
.src1_abs
= 0x0;
2894 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= 1;
2895 alu_instruction_ptr
->m_Word1_OP2
.f6
.omod
= SQ_ALU_OMOD_OFF
;
2899 alu_instruction_ptr
->m_Word1_OP2
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2900 alu_instruction_ptr
->m_Word1_OP2
.f
.src0_abs
= 0x0;
2901 alu_instruction_ptr
->m_Word1_OP2
.f
.src1_abs
= 0x0;
2902 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= 1;
2903 alu_instruction_ptr
->m_Word1_OP2
.f
.omod
= SQ_ALU_OMOD_OFF
;
2907 if(GL_FALSE
== add_alu_instruction(pAsm
, alu_instruction_ptr
, contiguous_slots_needed
) )
2912 if (1 == number_of_scalar_operations
)
2914 if(GL_FALSE
== check_scalar(pAsm
, alu_instruction_ptr
) )
2921 if(GL_FALSE
== check_vector(pAsm
, alu_instruction_ptr
) )
2927 contiguous_slots_needed
-= 2;
2933 GLboolean
next_ins(r700_AssemblerBase
*pAsm
)
2935 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
2937 if( GL_TRUE
== pAsm
->is_tex
)
2939 if (pILInst
->TexSrcTarget
== TEXTURE_RECT_INDEX
) {
2940 if( GL_FALSE
== assemble_tex_instruction(pAsm
, GL_FALSE
) )
2942 radeon_error("Error assembling TEX instruction\n");
2946 if( GL_FALSE
== assemble_tex_instruction(pAsm
, GL_TRUE
) )
2948 radeon_error("Error assembling TEX instruction\n");
2955 if( GL_FALSE
== assemble_alu_instruction(pAsm
) )
2957 radeon_error("Error assembling ALU instruction\n");
2962 if(pAsm
->D
.dst
.rtype
== DST_REG_OUT
)
2966 // There is no mask for OP3 instructions, so all channels are written
2967 pAsm
->pucOutMask
[pAsm
->D
.dst
.reg
- pAsm
->starting_export_register_number
] = 0xF;
2971 pAsm
->pucOutMask
[pAsm
->D
.dst
.reg
- pAsm
->starting_export_register_number
]
2972 |= (unsigned char)pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
;
2976 //reset for next inst.
2979 pAsm
->S
[0].bits
= 0;
2980 pAsm
->S
[1].bits
= 0;
2981 pAsm
->S
[2].bits
= 0;
2982 pAsm
->is_tex
= GL_FALSE
;
2983 pAsm
->need_tex_barrier
= GL_FALSE
;
2988 GLboolean
next_ins2(r700_AssemblerBase
*pAsm
)
2990 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
2993 if( GL_FALSE
== assemble_alu_instruction2(pAsm
) )
2995 radeon_error("Error assembling ALU instruction\n");
2999 if(pAsm
->D
.dst
.rtype
== DST_REG_OUT
)
3003 // There is no mask for OP3 instructions, so all channels are written
3004 pAsm
->pucOutMask
[pAsm
->D
.dst
.reg
- pAsm
->starting_export_register_number
] = 0xF;
3008 pAsm
->pucOutMask
[pAsm
->D
.dst
.reg
- pAsm
->starting_export_register_number
]
3009 |= (unsigned char)pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
;
3013 //reset for next inst.
3016 pAsm
->S
[0].bits
= 0;
3017 pAsm
->S
[1].bits
= 0;
3018 pAsm
->S
[2].bits
= 0;
3019 pAsm
->is_tex
= GL_FALSE
;
3020 pAsm
->need_tex_barrier
= GL_FALSE
;
3028 GLboolean
next_ins_literal(r700_AssemblerBase
*pAsm
, GLfloat
* pLiteral
)
3030 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
3033 if( GL_FALSE
== assemble_alu_instruction_literal(pAsm
, pLiteral
) )
3035 radeon_error("Error assembling ALU instruction\n");
3039 //reset for next inst.
3042 pAsm
->S
[0].bits
= 0;
3043 pAsm
->S
[1].bits
= 0;
3044 pAsm
->S
[2].bits
= 0;
3045 pAsm
->is_tex
= GL_FALSE
;
3046 pAsm
->need_tex_barrier
= GL_FALSE
;
3050 GLboolean
assemble_math_function(r700_AssemblerBase
* pAsm
, BITS opcode
)
3056 tmp
= gethelpr(pAsm
);
3058 // opcode tmp.x, a.x
3061 pAsm
->D
.dst
.opcode
= opcode
;
3062 pAsm
->D
.dst
.math
= 1;
3064 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3065 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3066 pAsm
->D
.dst
.reg
= tmp
;
3067 pAsm
->D
.dst
.writex
= 1;
3069 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3074 if ( GL_FALSE
== next_ins(pAsm
) )
3079 // Now replicate result to all necessary channels in destination
3080 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3082 if( GL_FALSE
== assemble_dst(pAsm
) )
3087 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3088 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3089 pAsm
->S
[0].src
.reg
= tmp
;
3091 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3092 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3094 if( GL_FALSE
== next_ins(pAsm
) )
3102 GLboolean
assemble_ABS(r700_AssemblerBase
*pAsm
)
3106 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3108 if( GL_FALSE
== assemble_dst(pAsm
) )
3112 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3117 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
3118 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
3120 if ( GL_FALSE
== next_ins(pAsm
) )
3128 GLboolean
assemble_ADD(r700_AssemblerBase
*pAsm
)
3130 if( GL_FALSE
== checkop2(pAsm
) )
3135 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
3137 if( GL_FALSE
== assemble_dst(pAsm
) )
3142 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3147 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3152 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_SUB
)
3154 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
3157 if( GL_FALSE
== next_ins(pAsm
) )
3165 GLboolean
assemble_ARL(r700_AssemblerBase
*pAsm
)
3166 { /* TODO: ar values dont' persist between clauses */
3167 if( GL_FALSE
== checkop1(pAsm
) )
3172 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOVA_FLOOR
;
3173 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3174 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3175 pAsm
->D
.dst
.reg
= 0;
3176 pAsm
->D
.dst
.writex
= 0;
3177 pAsm
->D
.dst
.writey
= 0;
3178 pAsm
->D
.dst
.writez
= 0;
3179 pAsm
->D
.dst
.writew
= 0;
3181 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3186 if( GL_FALSE
== next_ins(pAsm
) )
3194 GLboolean
assemble_BAD(char *opcode_str
)
3196 radeon_error("Not yet implemented instruction (%s)\n", opcode_str
);
3200 GLboolean
assemble_CMP(r700_AssemblerBase
*pAsm
)
3204 if( GL_FALSE
== checkop3(pAsm
) )
3209 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_CNDGE
;
3210 pAsm
->D
.dst
.op3
= 1;
3214 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
3216 //OP3 has no support for write mask
3217 tmp
= gethelpr(pAsm
);
3219 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3220 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3221 pAsm
->D
.dst
.reg
= tmp
;
3223 nomask_PVSDST(&(pAsm
->D
.dst
));
3227 if( GL_FALSE
== assemble_dst(pAsm
) )
3233 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3238 if( GL_FALSE
== assemble_src(pAsm
, 2, 1) )
3243 if( GL_FALSE
== assemble_src(pAsm
, 1, 2) )
3248 if ( GL_FALSE
== next_ins(pAsm
) )
3253 if (0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
3255 if( GL_FALSE
== assemble_dst(pAsm
) )
3260 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3263 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3264 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3265 pAsm
->S
[0].src
.reg
= tmp
;
3267 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3268 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3270 if( GL_FALSE
== next_ins(pAsm
) )
3279 GLboolean
assemble_COS(r700_AssemblerBase
*pAsm
)
3281 return assemble_math_function(pAsm
, SQ_OP2_INST_COS
);
3284 GLboolean
assemble_DOT(r700_AssemblerBase
*pAsm
)
3286 if( GL_FALSE
== checkop2(pAsm
) )
3291 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_DOT4
;
3293 if( GL_FALSE
== assemble_dst(pAsm
) )
3298 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3303 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3308 if(OPCODE_DP3
== pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
)
3310 zerocomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
3311 zerocomp_PVSSRC(&(pAsm
->S
[1].src
), 3);
3313 else if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_DPH
)
3315 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
3318 if ( GL_FALSE
== next_ins(pAsm
) )
3326 GLboolean
assemble_DST(r700_AssemblerBase
*pAsm
)
3328 if( GL_FALSE
== checkop2(pAsm
) )
3333 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3335 if( GL_FALSE
== assemble_dst(pAsm
) )
3340 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3345 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3350 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 0);
3351 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
3353 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 0);
3354 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 2);
3356 if ( GL_FALSE
== next_ins(pAsm
) )
3364 GLboolean
assemble_EX2(r700_AssemblerBase
*pAsm
)
3366 return assemble_math_function(pAsm
, SQ_OP2_INST_EXP_IEEE
);
3369 GLboolean
assemble_EXP(r700_AssemblerBase
*pAsm
)
3375 tmp
= gethelpr(pAsm
);
3380 if (pAsm
->pILInst
->DstReg
.WriteMask
& 0x1) {
3381 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
3383 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3384 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3385 pAsm
->D
.dst
.reg
= tmp
;
3386 pAsm
->D
.dst
.writex
= 1;
3388 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3393 if( GL_FALSE
== next_ins(pAsm
) )
3398 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3399 pAsm
->D
.dst
.math
= 1;
3401 if( GL_FALSE
== assemble_dst(pAsm
) )
3406 pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3408 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3409 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3410 pAsm
->S
[0].src
.reg
= tmp
;
3412 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3413 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3415 if( GL_FALSE
== next_ins(pAsm
) )
3423 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 1) & 0x1) {
3424 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
3426 if( GL_FALSE
== assemble_dst(pAsm
) )
3431 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3436 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3438 if( GL_FALSE
== next_ins(pAsm
) )
3446 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 2) & 0x1) {
3447 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3448 pAsm
->D
.dst
.math
= 1;
3450 if( GL_FALSE
== assemble_dst(pAsm
) )
3455 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3460 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writew
= 0;
3462 if( GL_FALSE
== next_ins(pAsm
) )
3470 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 3) & 0x1) {
3471 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3473 if( GL_FALSE
== assemble_dst(pAsm
) )
3478 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= 0;
3480 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3481 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3482 pAsm
->S
[0].src
.reg
= tmp
;
3484 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_1
);
3485 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3487 if( GL_FALSE
== next_ins(pAsm
) )
3496 GLboolean
assemble_FLR(r700_AssemblerBase
*pAsm
)
3500 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
3502 if ( GL_FALSE
== assemble_dst(pAsm
) )
3507 if ( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3512 if ( GL_FALSE
== next_ins(pAsm
) )
3520 GLboolean
assemble_FLR_INT(r700_AssemblerBase
*pAsm
)
3522 return assemble_math_function(pAsm
, SQ_OP2_INST_FLT_TO_INT
);
3525 GLboolean
assemble_FRC(r700_AssemblerBase
*pAsm
)
3529 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
3531 if ( GL_FALSE
== assemble_dst(pAsm
) )
3536 if ( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3541 if ( GL_FALSE
== next_ins(pAsm
) )
3549 GLboolean
assemble_KIL(r700_AssemblerBase
*pAsm
, GLuint opcode
)
3553 pAsm
->D
.dst
.opcode
= opcode
;
3554 pAsm
->D
.dst
.math
= 1;
3556 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3557 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3558 pAsm
->D
.dst
.reg
= 0;
3559 pAsm
->D
.dst
.writex
= 0;
3560 pAsm
->D
.dst
.writey
= 0;
3561 pAsm
->D
.dst
.writez
= 0;
3562 pAsm
->D
.dst
.writew
= 0;
3564 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3569 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3574 if ( GL_FALSE
== next_ins2(pAsm
) )
3579 /* Doc says KILL has to be last(end) ALU clause */
3580 pAsm
->pR700Shader
->killIsUsed
= GL_TRUE
;
3581 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
3586 GLboolean
assemble_LG2(r700_AssemblerBase
*pAsm
)
3588 return assemble_math_function(pAsm
, SQ_OP2_INST_LOG_IEEE
);
3591 GLboolean
assemble_LRP(r700_AssemblerBase
*pAsm
)
3595 if( GL_FALSE
== checkop3(pAsm
) )
3600 tmp
= gethelpr(pAsm
);
3602 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
3604 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3605 pAsm
->D
.dst
.reg
= tmp
;
3606 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3607 nomask_PVSDST(&(pAsm
->D
.dst
));
3610 if( GL_FALSE
== assemble_src(pAsm
, 1, 0) )
3615 if ( GL_FALSE
== assemble_src(pAsm
, 2, 1) )
3620 neg_PVSSRC(&(pAsm
->S
[1].src
));
3622 if( GL_FALSE
== next_ins(pAsm
) )
3627 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
3628 pAsm
->D
.dst
.op3
= 1;
3630 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3631 pAsm
->D
.dst
.reg
= tmp
;
3632 nomask_PVSDST(&(pAsm
->D
.dst
));
3633 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3635 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3636 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3637 pAsm
->S
[0].src
.reg
= tmp
;
3638 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3641 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
3645 if( GL_FALSE
== assemble_src(pAsm
, 2, -1) )
3650 if( GL_FALSE
== next_ins(pAsm
) )
3655 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3657 if( GL_FALSE
== assemble_dst(pAsm
) )
3662 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3663 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3664 pAsm
->S
[0].src
.reg
= tmp
;
3665 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3667 if( GL_FALSE
== next_ins(pAsm
) )
3675 GLboolean
assemble_LOG(r700_AssemblerBase
*pAsm
)
3677 BITS tmp1
, tmp2
, tmp3
;
3681 tmp1
= gethelpr(pAsm
);
3682 tmp2
= gethelpr(pAsm
);
3683 tmp3
= gethelpr(pAsm
);
3685 // FIXME: The hardware can do fabs() directly on input
3686 // elements, but the compiler doesn't have the
3687 // capability to use that.
3689 // MAX tmp1.x, a.x, -a.x (fabs(a.x))
3691 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3693 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3694 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3695 pAsm
->D
.dst
.reg
= tmp1
;
3696 pAsm
->D
.dst
.writex
= 1;
3698 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3703 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
3704 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
3706 if ( GL_FALSE
== next_ins(pAsm
) )
3713 // LG2 tmp2.x, tmp1.x
3714 // FLOOR tmp3.x, tmp2.x
3715 // MOV dst.x, tmp3.x
3716 // ADD tmp3.x, tmp2.x, -tmp3.x
3717 // EX2 dst.y, tmp3.x
3718 // MOV dst.z, tmp2.x
3721 // LG2 tmp2.x, tmp1.x
3722 // FLOOR tmp3.x, tmp2.x
3724 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_IEEE
;
3725 pAsm
->D
.dst
.math
= 1;
3727 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3728 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3729 pAsm
->D
.dst
.reg
= tmp2
;
3730 pAsm
->D
.dst
.writex
= 1;
3732 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3733 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3734 pAsm
->S
[0].src
.reg
= tmp1
;
3736 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3737 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3739 if( GL_FALSE
== next_ins(pAsm
) )
3744 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
3746 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3747 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3748 pAsm
->D
.dst
.reg
= tmp3
;
3749 pAsm
->D
.dst
.writex
= 1;
3751 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3752 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3753 pAsm
->S
[0].src
.reg
= tmp2
;
3755 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3756 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3758 if( GL_FALSE
== next_ins(pAsm
) )
3763 // MOV dst.x, tmp3.x
3765 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3767 if( GL_FALSE
== assemble_dst(pAsm
) )
3772 pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3774 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3775 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3776 pAsm
->S
[0].src
.reg
= tmp3
;
3778 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3779 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3781 if( GL_FALSE
== next_ins(pAsm
) )
3786 // ADD tmp3.x, tmp2.x, -tmp3.x
3787 // EX2 dst.y, tmp3.x
3789 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
3791 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3792 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3793 pAsm
->D
.dst
.reg
= tmp3
;
3794 pAsm
->D
.dst
.writex
= 1;
3796 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3797 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3798 pAsm
->S
[0].src
.reg
= tmp2
;
3800 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3801 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3803 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3804 pAsm
->S
[1].src
.rtype
= DST_REG_TEMPORARY
;
3805 pAsm
->S
[1].src
.reg
= tmp3
;
3807 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
3808 neg_PVSSRC(&(pAsm
->S
[1].src
));
3810 if( GL_FALSE
== next_ins(pAsm
) )
3815 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3816 pAsm
->D
.dst
.math
= 1;
3818 if( GL_FALSE
== assemble_dst(pAsm
) )
3823 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3825 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3826 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3827 pAsm
->S
[0].src
.reg
= tmp3
;
3829 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3830 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3832 if( GL_FALSE
== next_ins(pAsm
) )
3837 // MOV dst.z, tmp2.x
3839 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3841 if( GL_FALSE
== assemble_dst(pAsm
) )
3846 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writew
= 0;
3848 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3849 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3850 pAsm
->S
[0].src
.reg
= tmp2
;
3852 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3853 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3855 if( GL_FALSE
== next_ins(pAsm
) )
3862 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3864 if( GL_FALSE
== assemble_dst(pAsm
) )
3869 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= 0;
3871 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3872 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3873 pAsm
->S
[0].src
.reg
= tmp1
;
3875 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_1
);
3876 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3878 if( GL_FALSE
== next_ins(pAsm
) )
3886 GLboolean
assemble_MAD(struct r700_AssemblerBase
*pAsm
)
3889 GLboolean bReplaceDst
= GL_FALSE
;
3890 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
3892 if( GL_FALSE
== checkop3(pAsm
) )
3897 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
3898 pAsm
->D
.dst
.op3
= 1;
3902 if(PROGRAM_TEMPORARY
== pILInst
->DstReg
.File
)
3903 { /* TODO : more investigation on MAD src and dst using same register */
3904 for(ii
=0; ii
<3; ii
++)
3906 if( (PROGRAM_TEMPORARY
== pILInst
->SrcReg
[ii
].File
)
3907 && (pILInst
->DstReg
.Index
== pILInst
->SrcReg
[ii
].Index
) )
3909 bReplaceDst
= GL_TRUE
;
3914 if(0xF != pILInst
->DstReg
.WriteMask
)
3915 { /* OP3 has no support for write mask */
3916 bReplaceDst
= GL_TRUE
;
3919 if(GL_TRUE
== bReplaceDst
)
3921 tmp
= gethelpr(pAsm
);
3923 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3924 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3925 pAsm
->D
.dst
.reg
= tmp
;
3927 nomask_PVSDST(&(pAsm
->D
.dst
));
3931 if( GL_FALSE
== assemble_dst(pAsm
) )
3937 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3942 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3947 if( GL_FALSE
== assemble_src(pAsm
, 2, -1) )
3952 if ( GL_FALSE
== next_ins(pAsm
) )
3957 if (GL_TRUE
== bReplaceDst
)
3959 if( GL_FALSE
== assemble_dst(pAsm
) )
3964 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3967 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3968 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3969 pAsm
->S
[0].src
.reg
= tmp
;
3971 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3972 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3974 if( GL_FALSE
== next_ins(pAsm
) )
3984 GLboolean
assemble_LIT(r700_AssemblerBase
*pAsm
)
3986 unsigned int dstReg
;
3987 unsigned int dstType
;
3988 unsigned int srcReg
;
3989 unsigned int srcType
;
3991 int tmp
= gethelpr(pAsm
);
3993 if( GL_FALSE
== assemble_dst(pAsm
) )
3997 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4001 dstReg
= pAsm
->D
.dst
.reg
;
4002 dstType
= pAsm
->D
.dst
.rtype
;
4003 srcReg
= pAsm
->S
[0].src
.reg
;
4004 srcType
= pAsm
->S
[0].src
.rtype
;
4006 /* dst.xw, <- 1.0 */
4007 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4008 pAsm
->D
.dst
.rtype
= dstType
;
4009 pAsm
->D
.dst
.reg
= dstReg
;
4010 pAsm
->D
.dst
.writex
= 1;
4011 pAsm
->D
.dst
.writey
= 0;
4012 pAsm
->D
.dst
.writez
= 0;
4013 pAsm
->D
.dst
.writew
= 1;
4014 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4015 pAsm
->S
[0].src
.reg
= tmp
;
4016 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4017 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4018 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_1
;
4019 pAsm
->S
[0].src
.swizzley
= SQ_SEL_1
;
4020 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_1
;
4021 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_1
;
4022 if( GL_FALSE
== next_ins(pAsm
) )
4027 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4032 /* dst.y = max(src.x, 0.0) */
4033 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
4034 pAsm
->D
.dst
.rtype
= dstType
;
4035 pAsm
->D
.dst
.reg
= dstReg
;
4036 pAsm
->D
.dst
.writex
= 0;
4037 pAsm
->D
.dst
.writey
= 1;
4038 pAsm
->D
.dst
.writez
= 0;
4039 pAsm
->D
.dst
.writew
= 0;
4040 pAsm
->S
[0].src
.rtype
= srcType
;
4041 pAsm
->S
[0].src
.reg
= srcReg
;
4042 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4043 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
);
4044 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4045 pAsm
->S
[1].src
.reg
= tmp
;
4046 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4047 noneg_PVSSRC(&(pAsm
->S
[1].src
));
4048 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_0
;
4049 pAsm
->S
[1].src
.swizzley
= SQ_SEL_0
;
4050 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_0
;
4051 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_0
;
4052 if( GL_FALSE
== next_ins(pAsm
) )
4057 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4062 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Y
, SQ_SEL_Y
, SQ_SEL_Y
, SQ_SEL_Y
);
4064 /* dst.z = log(src.y) */
4065 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_CLAMPED
;
4066 pAsm
->D
.dst
.math
= 1;
4067 pAsm
->D
.dst
.rtype
= dstType
;
4068 pAsm
->D
.dst
.reg
= dstReg
;
4069 pAsm
->D
.dst
.writex
= 0;
4070 pAsm
->D
.dst
.writey
= 0;
4071 pAsm
->D
.dst
.writez
= 1;
4072 pAsm
->D
.dst
.writew
= 0;
4073 pAsm
->S
[0].src
.rtype
= srcType
;
4074 pAsm
->S
[0].src
.reg
= srcReg
;
4075 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4076 if( GL_FALSE
== next_ins(pAsm
) )
4081 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4086 if( GL_FALSE
== assemble_src(pAsm
, 0, 2) )
4091 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
);
4093 swizzleagain_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
);
4095 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
4096 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MUL_LIT
;
4097 pAsm
->D
.dst
.math
= 1;
4098 pAsm
->D
.dst
.op3
= 1;
4099 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4100 pAsm
->D
.dst
.reg
= tmp
;
4101 pAsm
->D
.dst
.writex
= 1;
4102 pAsm
->D
.dst
.writey
= 0;
4103 pAsm
->D
.dst
.writez
= 0;
4104 pAsm
->D
.dst
.writew
= 0;
4106 pAsm
->S
[0].src
.rtype
= srcType
;
4107 pAsm
->S
[0].src
.reg
= srcReg
;
4108 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4110 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4111 pAsm
->S
[1].src
.reg
= dstReg
;
4112 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4113 noneg_PVSSRC(&(pAsm
->S
[1].src
));
4114 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_Z
;
4115 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Z
;
4116 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
4117 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_Z
;
4119 pAsm
->S
[2].src
.rtype
= srcType
;
4120 pAsm
->S
[2].src
.reg
= srcReg
;
4121 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
), ADDR_ABSOLUTE
);
4123 if( GL_FALSE
== next_ins(pAsm
) )
4128 /* dst.z = exp(tmp.x) */
4129 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
4130 pAsm
->D
.dst
.math
= 1;
4131 pAsm
->D
.dst
.rtype
= dstType
;
4132 pAsm
->D
.dst
.reg
= dstReg
;
4133 pAsm
->D
.dst
.writex
= 0;
4134 pAsm
->D
.dst
.writey
= 0;
4135 pAsm
->D
.dst
.writez
= 1;
4136 pAsm
->D
.dst
.writew
= 0;
4138 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4139 pAsm
->S
[0].src
.reg
= tmp
;
4140 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4141 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4142 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
4143 pAsm
->S
[0].src
.swizzley
= SQ_SEL_X
;
4144 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_X
;
4145 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_X
;
4147 if( GL_FALSE
== next_ins(pAsm
) )
4155 GLboolean
assemble_MAX(r700_AssemblerBase
*pAsm
)
4157 if( GL_FALSE
== checkop2(pAsm
) )
4162 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
4164 if( GL_FALSE
== assemble_dst(pAsm
) )
4169 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4174 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4179 if( GL_FALSE
== next_ins(pAsm
) )
4187 GLboolean
assemble_MIN(r700_AssemblerBase
*pAsm
)
4189 if( GL_FALSE
== checkop2(pAsm
) )
4194 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MIN
;
4196 if( GL_FALSE
== assemble_dst(pAsm
) )
4201 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4206 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4211 if( GL_FALSE
== next_ins(pAsm
) )
4219 GLboolean
assemble_MOV(r700_AssemblerBase
*pAsm
)
4223 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4225 if (GL_FALSE
== assemble_dst(pAsm
))
4230 if (GL_FALSE
== assemble_src(pAsm
, 0, -1))
4235 if ( GL_FALSE
== next_ins(pAsm
) )
4243 GLboolean
assemble_MUL(r700_AssemblerBase
*pAsm
)
4245 if( GL_FALSE
== checkop2(pAsm
) )
4250 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
4252 if( GL_FALSE
== assemble_dst(pAsm
) )
4257 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4262 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4267 if( GL_FALSE
== next_ins(pAsm
) )
4275 GLboolean
assemble_POW(r700_AssemblerBase
*pAsm
)
4281 tmp
= gethelpr(pAsm
);
4283 // LG2 tmp.x, a.swizzle
4284 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_IEEE
;
4285 pAsm
->D
.dst
.math
= 1;
4287 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4288 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4289 pAsm
->D
.dst
.reg
= tmp
;
4290 nomask_PVSDST(&(pAsm
->D
.dst
));
4292 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4297 if( GL_FALSE
== next_ins(pAsm
) )
4302 // MUL tmp.x, tmp.x, b.swizzle
4303 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
4305 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4306 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4307 pAsm
->D
.dst
.reg
= tmp
;
4308 nomask_PVSDST(&(pAsm
->D
.dst
));
4310 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4311 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4312 pAsm
->S
[0].src
.reg
= tmp
;
4313 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4314 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4316 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4321 if( GL_FALSE
== next_ins(pAsm
) )
4326 // EX2 dst.mask, tmp.x
4328 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
4329 pAsm
->D
.dst
.math
= 1;
4331 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4332 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4333 pAsm
->D
.dst
.reg
= tmp
;
4334 nomask_PVSDST(&(pAsm
->D
.dst
));
4336 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4337 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4338 pAsm
->S
[0].src
.reg
= tmp
;
4339 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4340 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4342 if( GL_FALSE
== next_ins(pAsm
) )
4347 // Now replicate result to all necessary channels in destination
4348 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4350 if( GL_FALSE
== assemble_dst(pAsm
) )
4355 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4356 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
4357 pAsm
->S
[0].src
.reg
= tmp
;
4359 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4360 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4362 if( GL_FALSE
== next_ins(pAsm
) )
4370 GLboolean
assemble_RCP(r700_AssemblerBase
*pAsm
)
4372 return assemble_math_function(pAsm
, SQ_OP2_INST_RECIP_IEEE
);
4375 GLboolean
assemble_RSQ(r700_AssemblerBase
*pAsm
)
4377 return assemble_math_function(pAsm
, SQ_OP2_INST_RECIPSQRT_IEEE
);
4380 GLboolean
assemble_SIN(r700_AssemblerBase
*pAsm
)
4382 return assemble_math_function(pAsm
, SQ_OP2_INST_SIN
);
4385 GLboolean
assemble_SCS(r700_AssemblerBase
*pAsm
)
4391 tmp
= gethelpr(pAsm
);
4394 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_COS
;
4395 pAsm
->D
.dst
.math
= 1;
4397 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4398 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4399 pAsm
->D
.dst
.reg
= tmp
;
4400 pAsm
->D
.dst
.writex
= 1;
4402 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4407 if ( GL_FALSE
== next_ins(pAsm
) )
4413 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SIN
;
4414 pAsm
->D
.dst
.math
= 1;
4416 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4417 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4418 pAsm
->D
.dst
.reg
= tmp
;
4419 pAsm
->D
.dst
.writey
= 1;
4421 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4426 if( GL_FALSE
== next_ins(pAsm
) )
4431 // MOV dst.mask, tmp
4432 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4434 if( GL_FALSE
== assemble_dst(pAsm
) )
4439 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4440 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
4441 pAsm
->S
[0].src
.reg
= tmp
;
4443 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4444 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_0
;
4445 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_0
;
4447 if ( GL_FALSE
== next_ins(pAsm
) )
4455 GLboolean
assemble_LOGIC(r700_AssemblerBase
*pAsm
, BITS opcode
)
4457 if( GL_FALSE
== checkop2(pAsm
) )
4462 pAsm
->D
.dst
.opcode
= opcode
;
4463 pAsm
->D
.dst
.math
= 1;
4465 if( GL_FALSE
== assemble_dst(pAsm
) )
4470 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4475 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4480 if( GL_FALSE
== next_ins(pAsm
) )
4488 GLboolean
assemble_LOGIC_PRED(r700_AssemblerBase
*pAsm
, BITS opcode
)
4490 if( GL_FALSE
== checkop2(pAsm
) )
4495 pAsm
->D
.dst
.opcode
= opcode
;
4496 pAsm
->D
.dst
.math
= 1;
4497 pAsm
->D
.dst
.predicated
= 1;
4498 pAsm
->D2
.dst2
.SaturateMode
= pAsm
->pILInst
[pAsm
->uiCurInst
].SaturateMode
;
4500 if( GL_FALSE
== assemble_dst(pAsm
) )
4505 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4510 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4515 if( GL_FALSE
== next_ins2(pAsm
) )
4523 GLboolean
assemble_SGE(r700_AssemblerBase
*pAsm
)
4525 if( GL_FALSE
== checkop2(pAsm
) )
4530 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SETGE
;
4532 if( GL_FALSE
== assemble_dst(pAsm
) )
4537 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4542 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4547 if( GL_FALSE
== next_ins(pAsm
) )
4555 GLboolean
assemble_SLT(r700_AssemblerBase
*pAsm
)
4557 if( GL_FALSE
== checkop2(pAsm
) )
4562 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SETGT
;
4564 if( GL_FALSE
== assemble_dst(pAsm
) )
4569 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
4574 if( GL_FALSE
== assemble_src(pAsm
, 1, 0) )
4579 if( GL_FALSE
== next_ins(pAsm
) )
4587 GLboolean
assemble_STP(r700_AssemblerBase
*pAsm
)
4592 GLboolean
assemble_TEX(r700_AssemblerBase
*pAsm
)
4594 GLboolean src_const
;
4595 GLboolean need_barrier
= GL_FALSE
;
4599 switch (pAsm
->pILInst
[pAsm
->uiCurInst
].SrcReg
[0].File
)
4601 case PROGRAM_CONSTANT
:
4602 case PROGRAM_LOCAL_PARAM
:
4603 case PROGRAM_ENV_PARAM
:
4604 case PROGRAM_STATE_VAR
:
4605 src_const
= GL_TRUE
;
4607 case PROGRAM_TEMPORARY
:
4610 src_const
= GL_FALSE
;
4614 if (GL_TRUE
== src_const
)
4616 if ( GL_FALSE
== mov_temp(pAsm
, 0) )
4618 need_barrier
= GL_TRUE
;
4621 switch (pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
)
4626 radeon_error("do not support TXB yet\n");
4632 radeon_error("Internal error: bad texture op (not TEX)\n");
4637 if (pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_TXP
)
4639 GLuint tmp
= gethelpr(pAsm
);
4640 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_RECIP_IEEE
;
4641 pAsm
->D
.dst
.math
= 1;
4642 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4643 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4644 pAsm
->D
.dst
.reg
= tmp
;
4645 pAsm
->D
.dst
.writew
= 1;
4647 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4651 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
);
4652 if( GL_FALSE
== next_ins(pAsm
) )
4657 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
4658 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4659 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4660 pAsm
->D
.dst
.reg
= tmp
;
4661 pAsm
->D
.dst
.writex
= 1;
4662 pAsm
->D
.dst
.writey
= 1;
4663 pAsm
->D
.dst
.writez
= 1;
4664 pAsm
->D
.dst
.writew
= 0;
4666 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4670 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4671 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4672 pAsm
->S
[1].src
.reg
= tmp
;
4673 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_W
);
4675 if( GL_FALSE
== next_ins(pAsm
) )
4680 pAsm
->aArgSubst
[1] = tmp
;
4681 need_barrier
= GL_TRUE
;
4684 if (pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcTarget
== TEXTURE_CUBE_INDEX
)
4686 GLuint tmp1
= gethelpr(pAsm
);
4687 GLuint tmp2
= gethelpr(pAsm
);
4689 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
4690 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_CUBE
;
4691 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4692 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4693 pAsm
->D
.dst
.reg
= tmp1
;
4694 nomask_PVSDST(&(pAsm
->D
.dst
));
4696 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4701 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
4706 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Z
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
);
4707 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Y
, SQ_SEL_X
, SQ_SEL_Z
, SQ_SEL_Z
);
4709 if( GL_FALSE
== next_ins(pAsm
) )
4714 /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently
4715 * have to do explicit instruction
4717 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
4718 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4719 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4720 pAsm
->D
.dst
.reg
= tmp1
;
4721 pAsm
->D
.dst
.writez
= 1;
4723 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4724 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4725 pAsm
->S
[0].src
.reg
= tmp1
;
4726 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4727 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
4728 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
4732 /* tmp1.z = RCP_e(|tmp1.z|) */
4733 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_RECIP_IEEE
;
4734 pAsm
->D
.dst
.math
= 1;
4735 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4736 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4737 pAsm
->D
.dst
.reg
= tmp1
;
4738 pAsm
->D
.dst
.writez
= 1;
4740 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4741 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4742 pAsm
->S
[0].src
.reg
= tmp1
;
4743 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_Z
;
4747 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
4748 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
4749 * muladd has no writemask, have to use another temp
4750 * also no support for imm constants, so add 1 here
4752 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
4753 pAsm
->D
.dst
.op3
= 1;
4754 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4755 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4756 pAsm
->D
.dst
.reg
= tmp2
;
4758 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4759 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4760 pAsm
->S
[0].src
.reg
= tmp1
;
4761 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4762 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4763 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4764 pAsm
->S
[1].src
.reg
= tmp1
;
4765 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Z
);
4766 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
), ADDR_ABSOLUTE
);
4767 pAsm
->S
[2].src
.rtype
= SRC_REG_TEMPORARY
;
4768 pAsm
->S
[2].src
.reg
= tmp1
;
4769 setswizzle_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_1
);
4773 /* ADD the remaining .5 */
4774 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
4775 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4776 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4777 pAsm
->D
.dst
.reg
= tmp2
;
4778 pAsm
->D
.dst
.writex
= 1;
4779 pAsm
->D
.dst
.writey
= 1;
4780 pAsm
->D
.dst
.writez
= 0;
4781 pAsm
->D
.dst
.writew
= 0;
4783 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4784 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4785 pAsm
->S
[0].src
.reg
= tmp2
;
4786 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4787 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4788 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4789 pAsm
->S
[1].src
.reg
= 252; // SQ_ALU_SRC_0_5
4790 noswizzle_PVSSRC(&(pAsm
->S
[1].src
));
4794 /* tmp1.xy = temp2.xy */
4795 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4796 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4797 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4798 pAsm
->D
.dst
.reg
= tmp1
;
4799 pAsm
->D
.dst
.writex
= 1;
4800 pAsm
->D
.dst
.writey
= 1;
4801 pAsm
->D
.dst
.writez
= 0;
4802 pAsm
->D
.dst
.writew
= 0;
4804 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4805 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4806 pAsm
->S
[0].src
.reg
= tmp2
;
4807 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4810 pAsm
->aArgSubst
[1] = tmp1
;
4811 need_barrier
= GL_TRUE
;
4815 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE
;
4816 pAsm
->is_tex
= GL_TRUE
;
4817 if ( GL_TRUE
== need_barrier
)
4819 pAsm
->need_tex_barrier
= GL_TRUE
;
4821 // Set src1 to tex unit id
4822 pAsm
->S
[1].src
.reg
= pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcUnit
;
4823 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4825 //No sw info from mesa compiler, so hard code here.
4826 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_X
;
4827 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Y
;
4828 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
4829 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_W
;
4831 if( GL_FALSE
== tex_dst(pAsm
) )
4836 if( GL_FALSE
== tex_src(pAsm
) )
4841 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_TXP
)
4843 /* hopefully did swizzles before */
4844 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4847 if(pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcTarget
== TEXTURE_CUBE_INDEX
)
4849 /* SAMPLE dst, tmp.yxwy, CUBE */
4850 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_Y
;
4851 pAsm
->S
[0].src
.swizzley
= SQ_SEL_X
;
4852 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_W
;
4853 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_Y
;
4856 if ( GL_FALSE
== next_ins(pAsm
) )
4864 GLboolean
assemble_XPD(r700_AssemblerBase
*pAsm
)
4868 if( GL_FALSE
== checkop2(pAsm
) )
4873 tmp
= gethelpr(pAsm
);
4875 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
4877 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4878 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4879 pAsm
->D
.dst
.reg
= tmp
;
4880 nomask_PVSDST(&(pAsm
->D
.dst
));
4882 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4887 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4892 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
, SQ_SEL_0
);
4893 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Y
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_0
);
4895 if( GL_FALSE
== next_ins(pAsm
) )
4900 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
4901 pAsm
->D
.dst
.op3
= 1;
4903 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
4905 tmp
= gethelpr(pAsm
);
4907 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4908 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4909 pAsm
->D
.dst
.reg
= tmp
;
4911 nomask_PVSDST(&(pAsm
->D
.dst
));
4915 if( GL_FALSE
== assemble_dst(pAsm
) )
4921 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4926 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4931 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Y
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_0
);
4932 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
, SQ_SEL_0
);
4934 // result1 + (neg) result0
4935 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
),ADDR_ABSOLUTE
);
4936 pAsm
->S
[2].src
.rtype
= SRC_REG_TEMPORARY
;
4937 pAsm
->S
[2].src
.reg
= tmp
;
4939 neg_PVSSRC(&(pAsm
->S
[2].src
));
4940 noswizzle_PVSSRC(&(pAsm
->S
[2].src
));
4942 if( GL_FALSE
== next_ins(pAsm
) )
4948 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
4950 if( GL_FALSE
== assemble_dst(pAsm
) )
4955 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4957 // Use tmp as source
4958 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4959 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4960 pAsm
->S
[0].src
.reg
= tmp
;
4962 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4963 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4965 if( GL_FALSE
== next_ins(pAsm
) )
4974 GLboolean
assemble_EXPORT(r700_AssemblerBase
*pAsm
)
4979 static inline void decreaseCurrent(r700_AssemblerBase
*pAsm
, GLuint uReason
)
4984 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
--;
4987 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
-= 4;
4990 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
-= 4;
4993 /* TODO : for 16 vp asic, should -= 2; */
4994 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
-= 1;
4999 static inline void checkStackDepth(r700_AssemblerBase
*pAsm
, GLuint uReason
, GLboolean bCheckMaxOnly
)
5001 if(GL_TRUE
== bCheckMaxOnly
)
5006 if((pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 1)
5007 > pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
5009 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
=
5010 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 1;
5014 if((pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 4)
5015 > pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
5017 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
=
5018 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 4;
5028 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
++;
5031 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+= 4;
5034 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+= 4;
5037 /* TODO : for 16 vp asic, should += 2; */
5038 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+= 1;
5042 if(pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
5043 > pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
5045 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
=
5046 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
;
5050 GLboolean
jumpToOffest(r700_AssemblerBase
*pAsm
, GLuint pops
, GLint offset
)
5052 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5057 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= pops
;
5058 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5059 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5061 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5062 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5063 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_JUMP
;
5064 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5066 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5068 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ offset
;
5073 GLboolean
pops(r700_AssemblerBase
*pAsm
, GLuint pops
)
5075 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5080 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= pops
;
5081 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5082 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5084 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5085 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5086 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_POP
;
5088 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5090 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5091 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
5096 GLboolean
assemble_IF(r700_AssemblerBase
*pAsm
, GLboolean bHasElse
)
5098 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5103 if(GL_TRUE
!= bHasElse
)
5105 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
5109 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
5111 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5112 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5114 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5115 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5116 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_JUMP
;
5117 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5119 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5122 pAsm
->fc_stack
[pAsm
->FCSP
].type
= FC_IF
;
5123 pAsm
->fc_stack
[pAsm
->FCSP
].mid
= NULL
;
5124 pAsm
->fc_stack
[pAsm
->FCSP
].midLen
= 0;
5125 pAsm
->fc_stack
[pAsm
->FCSP
].first
= pAsm
->cf_current_cf_clause_ptr
;
5127 #ifndef USE_CF_FOR_POP_AFTER
5128 if(GL_TRUE
!= bHasElse
)
5130 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_POP_AFTER
;
5132 #endif /* USE_CF_FOR_POP_AFTER */
5134 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_FALSE
);
5139 GLboolean
assemble_ELSE(r700_AssemblerBase
*pAsm
)
5141 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5146 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1; ///
5147 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5148 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5150 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5151 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5152 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_ELSE
;
5153 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5155 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5157 pAsm
->fc_stack
[pAsm
->FCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc( (void *)pAsm
->fc_stack
[pAsm
->FCSP
].mid
,
5159 sizeof(R700ControlFlowGenericClause
*) );
5160 pAsm
->fc_stack
[pAsm
->FCSP
].mid
[0] = pAsm
->cf_current_cf_clause_ptr
;
5161 //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1;
5163 #ifndef USE_CF_FOR_POP_AFTER
5164 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_POP_AFTER
;
5165 #endif /* USE_CF_FOR_POP_AFTER */
5167 pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_Word0
.f
.addr
= pAsm
->pR700Shader
->plstCFInstructions_active
->uNumOfNode
- 1;
5172 GLboolean
assemble_ENDIF(r700_AssemblerBase
*pAsm
)
5174 #ifdef USE_CF_FOR_POP_AFTER
5176 #endif /* USE_CF_FOR_POP_AFTER */
5178 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
5180 if(NULL
== pAsm
->fc_stack
[pAsm
->FCSP
].mid
)
5182 /* no else in between */
5183 pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_Word0
.f
.addr
= pAsm
->pR700Shader
->plstCFInstructions_active
->uNumOfNode
;
5187 pAsm
->fc_stack
[pAsm
->FCSP
].mid
[0]->m_Word0
.f
.addr
= pAsm
->pR700Shader
->plstCFInstructions_active
->uNumOfNode
;
5190 if(NULL
!= pAsm
->fc_stack
[pAsm
->FCSP
].mid
)
5192 FREE(pAsm
->fc_stack
[pAsm
->FCSP
].mid
);
5195 if(pAsm
->fc_stack
[pAsm
->FCSP
].type
!= FC_IF
)
5197 radeon_error("if/endif in shader code are not paired. \n");
5203 decreaseCurrent(pAsm
, FC_PUSH_VPM
);
5208 GLboolean
assemble_BGNLOOP(r700_AssemblerBase
*pAsm
)
5210 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5216 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
5217 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5218 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5220 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5221 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5222 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_START_NO_AL
;
5223 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5225 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5228 pAsm
->fc_stack
[pAsm
->FCSP
].type
= FC_LOOP
;
5229 pAsm
->fc_stack
[pAsm
->FCSP
].mid
= NULL
;
5230 pAsm
->fc_stack
[pAsm
->FCSP
].unNumMid
= 0;
5231 pAsm
->fc_stack
[pAsm
->FCSP
].midLen
= 0;
5232 pAsm
->fc_stack
[pAsm
->FCSP
].first
= pAsm
->cf_current_cf_clause_ptr
;
5234 checkStackDepth(pAsm
, FC_LOOP
, GL_FALSE
);
5239 GLboolean
assemble_BRK(r700_AssemblerBase
*pAsm
)
5241 #ifdef USE_CF_FOR_CONTINUE_BREAK
5242 unsigned int unFCSP
;
5243 for(unFCSP
=pAsm
->FCSP
; unFCSP
>0; unFCSP
--)
5245 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
5252 radeon_error("Break is not inside loop/endloop pair.\n");
5256 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5262 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
5263 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5264 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5266 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5267 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5268 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_BREAK
;
5270 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5272 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5274 pAsm
->fc_stack
[unFCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc(
5275 (void *)pAsm
->fc_stack
[unFCSP
].mid
,
5276 sizeof(R700ControlFlowGenericClause
*) * pAsm
->fc_stack
[unFCSP
].unNumMid
,
5277 sizeof(R700ControlFlowGenericClause
*) * (pAsm
->fc_stack
[unFCSP
].unNumMid
+ 1) );
5278 pAsm
->fc_stack
[unFCSP
].mid
[pAsm
->fc_stack
[unFCSP
].unNumMid
] = pAsm
->cf_current_cf_clause_ptr
;
5279 pAsm
->fc_stack
[unFCSP
].unNumMid
++;
5281 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5286 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
5287 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5288 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5290 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5291 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5292 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_POP
;
5294 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5296 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5297 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
5299 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_TRUE
);
5301 #endif //USE_CF_FOR_CONTINUE_BREAK
5305 GLboolean
assemble_CONT(r700_AssemblerBase
*pAsm
)
5307 #ifdef USE_CF_FOR_CONTINUE_BREAK
5308 unsigned int unFCSP
;
5309 for(unFCSP
=pAsm
->FCSP
; unFCSP
>0; unFCSP
--)
5311 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
5318 radeon_error("Continue is not inside loop/endloop pair.\n");
5322 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5328 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
5329 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5330 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5332 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5333 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5334 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_CONTINUE
;
5336 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5338 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5340 pAsm
->fc_stack
[unFCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc(
5341 (void *)pAsm
->fc_stack
[unFCSP
].mid
,
5342 sizeof(R700ControlFlowGenericClause
*) * pAsm
->fc_stack
[unFCSP
].unNumMid
,
5343 sizeof(R700ControlFlowGenericClause
*) * (pAsm
->fc_stack
[unFCSP
].unNumMid
+ 1) );
5344 pAsm
->fc_stack
[unFCSP
].mid
[pAsm
->fc_stack
[unFCSP
].unNumMid
] = pAsm
->cf_current_cf_clause_ptr
;
5345 pAsm
->fc_stack
[unFCSP
].unNumMid
++;
5347 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5352 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
5353 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5354 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5356 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5357 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5358 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_POP
;
5360 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5362 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5363 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
5365 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_TRUE
);
5367 #endif /* USE_CF_FOR_CONTINUE_BREAK */
5372 GLboolean
assemble_ENDLOOP(r700_AssemblerBase
*pAsm
)
5376 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5382 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
5383 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5384 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5386 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5387 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5388 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_END
;
5389 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5391 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5393 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_uIndex
+ 1;
5394 pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
5396 #ifdef USE_CF_FOR_CONTINUE_BREAK
5397 for(i
=0; i
<pAsm
->fc_stack
[pAsm
->FCSP
].unNumMid
; i
++)
5399 pAsm
->fc_stack
[pAsm
->FCSP
].mid
[i
]->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
;
5401 if(NULL
!= pAsm
->fc_stack
[pAsm
->FCSP
].mid
)
5403 FREE(pAsm
->fc_stack
[pAsm
->FCSP
].mid
);
5407 if(pAsm
->fc_stack
[pAsm
->FCSP
].type
!= FC_LOOP
)
5409 radeon_error("loop/endloop in shader code are not paired. \n");
5415 if((pAsm
->unCFflags
& HAS_CURRENT_LOOPRET
) > 0)
5417 for(unFCSP
=(pAsm
->FCSP
-1); unFCSP
>pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
; unFCSP
--)
5419 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
5421 breakLoopOnFlag(pAsm
, unFCSP
);
5424 else if(FC_IF
== pAsm
->fc_stack
[unFCSP
].type
)
5429 if(unFCSP
<= pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
)
5431 #ifdef USE_CF_FOR_POP_AFTER
5432 returnOnFlag(pAsm
, unIF
);
5434 returnOnFlag(pAsm
, 0);
5435 #endif /* USE_CF_FOR_POP_AFTER */
5436 pAsm
->unCFflags
&= ~HAS_CURRENT_LOOPRET
;
5442 decreaseCurrent(pAsm
, FC_LOOP
);
5447 void add_return_inst(r700_AssemblerBase
*pAsm
)
5449 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5453 //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5454 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
5455 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5456 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5458 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5459 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5460 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_RETURN
;
5461 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5463 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5466 GLboolean
assemble_BGNSUB(r700_AssemblerBase
*pAsm
, GLint nILindex
)
5469 if( (pAsm
->unSubArrayPointer
+ 1) > pAsm
->unSubArraySize
)
5471 pAsm
->subs
= (SUB_OFFSET
*)_mesa_realloc( (void *)pAsm
->subs
,
5472 sizeof(SUB_OFFSET
) * pAsm
->unSubArraySize
,
5473 sizeof(SUB_OFFSET
) * (pAsm
->unSubArraySize
+ 10) );
5474 if(NULL
== pAsm
->subs
)
5478 pAsm
->unSubArraySize
+= 10;
5481 pAsm
->subs
[pAsm
->unSubArrayPointer
].subIL_Offset
= nILindex
;
5482 pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
.pHead
=NULL
;
5483 pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
.pTail
=NULL
;
5484 pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
.uNumOfNode
=0;
5487 pAsm
->CALLSTACK
[pAsm
->CALLSP
].subDescIndex
= pAsm
->unSubArrayPointer
;
5488 pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
= pAsm
->FCSP
;
5489 pAsm
->CALLSTACK
[pAsm
->CALLSP
].plstCFInstructions_local
5490 = &(pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
);
5491 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
= 0;
5492 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
= 0;
5493 SetActiveCFlist(pAsm
->pR700Shader
,
5494 pAsm
->CALLSTACK
[pAsm
->CALLSP
].plstCFInstructions_local
);
5496 pAsm
->unSubArrayPointer
++;
5499 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
5502 pAsm
->fc_stack
[pAsm
->FCSP
].type
= FC_REP
;
5504 checkStackDepth(pAsm
, FC_REP
, GL_FALSE
);
5509 GLboolean
assemble_ENDSUB(r700_AssemblerBase
*pAsm
)
5511 if(pAsm
->fc_stack
[pAsm
->FCSP
].type
!= FC_REP
)
5513 radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n");
5517 /* copy max to sub structure */
5518 pAsm
->subs
[pAsm
->CALLSTACK
[pAsm
->CALLSP
].subDescIndex
].unStackDepthMax
5519 = pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
;
5521 decreaseCurrent(pAsm
, FC_REP
);
5524 SetActiveCFlist(pAsm
->pR700Shader
,
5525 pAsm
->CALLSTACK
[pAsm
->CALLSP
].plstCFInstructions_local
);
5527 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
5534 GLboolean
assemble_RET(r700_AssemblerBase
*pAsm
)
5538 if(pAsm
->CALLSP
> 0)
5541 for(unFCSP
=pAsm
->FCSP
; unFCSP
>pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
; unFCSP
--)
5543 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
5545 setRetInLoopFlag(pAsm
, SQ_SEL_1
);
5546 breakLoopOnFlag(pAsm
, unFCSP
);
5547 pAsm
->unCFflags
|= LOOPRET_FLAGS
;
5551 else if(FC_IF
== pAsm
->fc_stack
[unFCSP
].type
)
5558 #ifdef USE_CF_FOR_POP_AFTER
5563 #endif /* USE_CF_FOR_POP_AFTER */
5565 add_return_inst(pAsm
);
5570 GLboolean
assemble_CAL(r700_AssemblerBase
*pAsm
,
5572 GLuint uiNumberInsts
,
5573 struct prog_instruction
*pILInst
)
5575 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
5577 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5582 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.call_count
= 1;
5583 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
5584 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5585 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5587 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5588 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5589 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_CALL
;
5590 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5592 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5595 if( (pAsm
->unCallerArrayPointer
+ 1) > pAsm
->unCallerArraySize
)
5597 pAsm
->callers
= (CALLER_POINTER
*)_mesa_realloc( (void *)pAsm
->callers
,
5598 sizeof(CALLER_POINTER
) * pAsm
->unCallerArraySize
,
5599 sizeof(CALLER_POINTER
) * (pAsm
->unCallerArraySize
+ 10) );
5600 if(NULL
== pAsm
->callers
)
5604 pAsm
->unCallerArraySize
+= 10;
5607 pAsm
->callers
[pAsm
->unCallerArrayPointer
].subIL_Offset
= nILindex
;
5608 pAsm
->callers
[pAsm
->unCallerArrayPointer
].cf_ptr
= pAsm
->cf_current_cf_clause_ptr
;
5610 pAsm
->unCallerArrayPointer
++;
5616 for(j
=0; j
<pAsm
->unSubArrayPointer
; j
++)
5618 if(nILindex
== pAsm
->subs
[j
].subIL_Offset
)
5619 { /* compiled before */
5621 max
= pAsm
->subs
[j
].unStackDepthMax
5622 + pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
;
5623 if(max
> pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
5625 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
= max
;
5628 pAsm
->callers
[pAsm
->unCallerArrayPointer
- 1].subDescIndex
= j
;
5633 pAsm
->callers
[pAsm
->unCallerArrayPointer
- 1].subDescIndex
= pAsm
->unSubArrayPointer
;
5634 unSubID
= pAsm
->unSubArrayPointer
;
5636 bRet
= AssembleInstr(nILindex
, uiNumberInsts
, pILInst
, pAsm
);
5640 max
= pAsm
->subs
[unSubID
].unStackDepthMax
5641 + pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
;
5642 if(max
> pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
5644 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
= max
;
5651 GLboolean
setRetInLoopFlag(r700_AssemblerBase
*pAsm
, GLuint flagValue
)
5653 GLfloat fLiteral
[2] = {0.1, 0.0};
5655 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
5656 pAsm
->D
.dst
.op3
= 0;
5657 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5658 pAsm
->D
.dst
.reg
= pAsm
->flag_reg_index
;
5659 pAsm
->D
.dst
.writex
= 1;
5660 pAsm
->D
.dst
.writey
= 0;
5661 pAsm
->D
.dst
.writez
= 0;
5662 pAsm
->D
.dst
.writew
= 0;
5663 pAsm
->D2
.dst2
.literal
= 1;
5664 pAsm
->D2
.dst2
.SaturateMode
= SATURATE_OFF
;
5665 pAsm
->D
.dst
.predicated
= 0;
5667 pAsm
->S
[0].src
.rtype
= SRC_REC_LITERAL
;
5668 //pAsm->S[0].src.reg = 0;
5669 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5670 noneg_PVSSRC(&(pAsm
->S
[0].src
));
5671 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
5672 pAsm
->S
[0].src
.swizzley
= SQ_SEL_Y
;
5673 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_Z
;
5674 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_W
;
5676 if( GL_FALSE
== next_ins_literal(pAsm
, &(fLiteral
[0])) )
5681 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
5682 pAsm
->S
[0].src
.reg
= 0;
5683 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5684 noneg_PVSSRC(&(pAsm
->S
[0].src
));
5685 pAsm
->S
[0].src
.swizzlex
= flagValue
;
5686 pAsm
->S
[0].src
.swizzley
= flagValue
;
5687 pAsm
->S
[0].src
.swizzlez
= flagValue
;
5688 pAsm
->S
[0].src
.swizzlew
= flagValue
;
5690 if( GL_FALSE
== next_ins2(pAsm
) )
5699 GLboolean
testFlag(r700_AssemblerBase
*pAsm
)
5701 GLfloat fLiteral
[2] = {0.1, 0.0};
5704 GLuint tmp
= gethelpr(pAsm
);
5705 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
5707 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_PRED_SETE
;
5708 pAsm
->D
.dst
.math
= 1;
5709 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5710 pAsm
->D
.dst
.reg
= tmp
;
5711 pAsm
->D
.dst
.writex
= 1;
5712 pAsm
->D
.dst
.writey
= 0;
5713 pAsm
->D
.dst
.writez
= 0;
5714 pAsm
->D
.dst
.writew
= 0;
5715 pAsm
->D2
.dst2
.literal
= 1;
5716 pAsm
->D2
.dst2
.SaturateMode
= SATURATE_OFF
;
5717 pAsm
->D
.dst
.predicated
= 1;
5719 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
5720 pAsm
->S
[0].src
.reg
= pAsm
->flag_reg_index
;
5721 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5722 noneg_PVSSRC(&(pAsm
->S
[0].src
));
5723 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
5724 pAsm
->S
[0].src
.swizzley
= SQ_SEL_Y
;
5725 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_Z
;
5726 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_W
;
5728 pAsm
->S
[1].src
.rtype
= SRC_REC_LITERAL
;
5729 //pAsm->S[1].src.reg = 0;
5730 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
5731 noneg_PVSSRC(&(pAsm
->S
[1].src
));
5732 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_X
;
5733 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Y
;
5734 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
5735 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_W
;
5737 if( GL_FALSE
== next_ins_literal(pAsm
, &(fLiteral
[0])) )
5742 pAsm
->S
[1].src
.rtype
= DST_REG_TEMPORARY
;
5743 pAsm
->S
[1].src
.reg
= 0;
5744 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
5745 noneg_PVSSRC(&(pAsm
->S
[1].src
));
5746 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_1
;
5747 pAsm
->S
[1].src
.swizzley
= SQ_SEL_1
;
5748 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_1
;
5749 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_1
;
5751 if( GL_FALSE
== next_ins2(pAsm
) )
5757 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_TRUE
);
5762 GLboolean
returnOnFlag(r700_AssemblerBase
*pAsm
, GLuint unIF
)
5765 jumpToOffest(pAsm
, 1, 4);
5766 setRetInLoopFlag(pAsm
, SQ_SEL_0
);
5767 pops(pAsm
, unIF
+ 1);
5768 add_return_inst(pAsm
);
5773 GLboolean
breakLoopOnFlag(r700_AssemblerBase
*pAsm
, GLuint unFCSP
)
5778 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5783 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
5784 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5785 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5787 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5788 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5789 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_BREAK
;
5790 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5792 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5794 pAsm
->fc_stack
[unFCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc(
5795 (void *)pAsm
->fc_stack
[unFCSP
].mid
,
5796 sizeof(R700ControlFlowGenericClause
*) * pAsm
->fc_stack
[unFCSP
].unNumMid
,
5797 sizeof(R700ControlFlowGenericClause
*) * (pAsm
->fc_stack
[unFCSP
].unNumMid
+ 1) );
5798 pAsm
->fc_stack
[unFCSP
].mid
[pAsm
->fc_stack
[unFCSP
].unNumMid
] = pAsm
->cf_current_cf_clause_ptr
;
5799 pAsm
->fc_stack
[unFCSP
].unNumMid
++;
5806 GLboolean
AssembleInstr(GLuint uiFirstInst
,
5807 GLuint uiNumberInsts
,
5808 struct prog_instruction
*pILInst
,
5809 r700_AssemblerBase
*pR700AsmCode
)
5813 pR700AsmCode
->pILInst
= pILInst
;
5814 for(i
=uiFirstInst
; i
<uiNumberInsts
; i
++)
5816 pR700AsmCode
->uiCurInst
= i
;
5818 #ifndef USE_CF_FOR_CONTINUE_BREAK
5819 if(OPCODE_BRK
== pILInst
[i
+1].Opcode
)
5821 switch(pILInst
[i
].Opcode
)
5824 pILInst
[i
].Opcode
= OPCODE_SGT
;
5827 pILInst
[i
].Opcode
= OPCODE_SGE
;
5830 pILInst
[i
].Opcode
= OPCODE_SLT
;
5833 pILInst
[i
].Opcode
= OPCODE_SLE
;
5836 pILInst
[i
].Opcode
= OPCODE_SNE
;
5839 pILInst
[i
].Opcode
= OPCODE_SEQ
;
5847 switch (pILInst
[i
].Opcode
)
5850 if ( GL_FALSE
== assemble_ABS(pR700AsmCode
) )
5855 if ( GL_FALSE
== assemble_ADD(pR700AsmCode
) )
5860 if ( GL_FALSE
== assemble_ARL(pR700AsmCode
) )
5864 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
5865 //if ( GL_FALSE == assemble_BAD("ARR") )
5870 if ( GL_FALSE
== assemble_CMP(pR700AsmCode
) )
5874 if ( GL_FALSE
== assemble_COS(pR700AsmCode
) )
5881 if ( GL_FALSE
== assemble_DOT(pR700AsmCode
) )
5886 if ( GL_FALSE
== assemble_DST(pR700AsmCode
) )
5891 if ( GL_FALSE
== assemble_EX2(pR700AsmCode
) )
5895 if ( GL_FALSE
== assemble_EXP(pR700AsmCode
) )
5900 if ( GL_FALSE
== assemble_FLR(pR700AsmCode
) )
5903 //case OP_FLR_INT: ;
5905 // if ( GL_FALSE == assemble_FLR_INT() )
5910 if ( GL_FALSE
== assemble_FRC(pR700AsmCode
) )
5916 /* done at OPCODE_SE/SGT...etc. */
5917 /* if ( GL_FALSE == assemble_KIL(pR700AsmCode) )
5921 if ( GL_FALSE
== assemble_LG2(pR700AsmCode
) )
5925 if ( GL_FALSE
== assemble_LIT(pR700AsmCode
) )
5929 if ( GL_FALSE
== assemble_LRP(pR700AsmCode
) )
5933 if ( GL_FALSE
== assemble_LOG(pR700AsmCode
) )
5938 if ( GL_FALSE
== assemble_MAD(pR700AsmCode
) )
5942 if ( GL_FALSE
== assemble_MAX(pR700AsmCode
) )
5946 if ( GL_FALSE
== assemble_MIN(pR700AsmCode
) )
5951 if ( GL_FALSE
== assemble_MOV(pR700AsmCode
) )
5955 if ( GL_FALSE
== assemble_MUL(pR700AsmCode
) )
5960 if ( GL_FALSE
== assemble_POW(pR700AsmCode
) )
5964 if ( GL_FALSE
== assemble_RCP(pR700AsmCode
) )
5968 if ( GL_FALSE
== assemble_RSQ(pR700AsmCode
) )
5972 if ( GL_FALSE
== assemble_SIN(pR700AsmCode
) )
5976 if ( GL_FALSE
== assemble_SCS(pR700AsmCode
) )
5981 if(OPCODE_IF
== pILInst
[i
+1].Opcode
)
5983 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
5984 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETE
) )
5989 else if(OPCODE_BRK
== pILInst
[i
+1].Opcode
)
5991 #ifdef USE_CF_FOR_CONTINUE_BREAK
5992 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
5994 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_BREAK
;
5996 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETE
) )
6001 else if(OPCODE_CONT
== pILInst
[i
+1].Opcode
)
6003 #ifdef USE_CF_FOR_CONTINUE_BREAK
6004 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6006 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_CONTINUE
;
6008 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETE
) )
6013 else if((OPCODE_KIL
== pILInst
[i
+1].Opcode
)||(OPCODE_KIL_NV
== pILInst
[i
+1].Opcode
))
6015 if ( GL_FALSE
== assemble_KIL(pR700AsmCode
, SQ_OP2_INST_KILLE
) )
6022 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETE
) )
6030 if(OPCODE_IF
== pILInst
[i
+1].Opcode
)
6032 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6033 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGT
) )
6038 else if(OPCODE_BRK
== pILInst
[i
+1].Opcode
)
6040 #ifdef USE_CF_FOR_CONTINUE_BREAK
6041 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6043 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_BREAK
;
6045 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGT
) )
6050 else if(OPCODE_CONT
== pILInst
[i
+1].Opcode
)
6052 #ifdef USE_CF_FOR_CONTINUE_BREAK
6053 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6055 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_CONTINUE
;
6058 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGT
) )
6063 else if((OPCODE_KIL
== pILInst
[i
+1].Opcode
)||(OPCODE_KIL_NV
== pILInst
[i
+1].Opcode
))
6065 if ( GL_FALSE
== assemble_KIL(pR700AsmCode
, SQ_OP2_INST_KILLGT
) )
6072 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETGT
) )
6080 if(OPCODE_IF
== pILInst
[i
+1].Opcode
)
6082 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6083 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGE
) )
6088 else if(OPCODE_BRK
== pILInst
[i
+1].Opcode
)
6090 #ifdef USE_CF_FOR_CONTINUE_BREAK
6091 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6093 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_BREAK
;
6095 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGE
) )
6100 else if(OPCODE_CONT
== pILInst
[i
+1].Opcode
)
6102 #ifdef USE_CF_FOR_CONTINUE_BREAK
6103 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6105 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_CONTINUE
;
6108 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGE
) )
6113 else if((OPCODE_KIL
== pILInst
[i
+1].Opcode
)||(OPCODE_KIL_NV
== pILInst
[i
+1].Opcode
))
6115 if ( GL_FALSE
== assemble_KIL(pR700AsmCode
, SQ_OP2_INST_KILLGE
) )
6122 if ( GL_FALSE
== assemble_SGE(pR700AsmCode
) )
6129 /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/
6132 struct prog_src_register SrcRegSave
[2];
6133 SrcRegSave
[0] = pILInst
[i
].SrcReg
[0];
6134 SrcRegSave
[1] = pILInst
[i
].SrcReg
[1];
6135 pILInst
[i
].SrcReg
[0] = SrcRegSave
[1];
6136 pILInst
[i
].SrcReg
[1] = SrcRegSave
[0];
6137 if(OPCODE_IF
== pILInst
[i
+1].Opcode
)
6139 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6140 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGT
) )
6142 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
6143 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
6147 else if(OPCODE_BRK
== pILInst
[i
+1].Opcode
)
6149 #ifdef USE_CF_FOR_CONTINUE_BREAK
6150 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6152 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_BREAK
;
6154 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGT
) )
6156 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
6157 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
6161 else if(OPCODE_CONT
== pILInst
[i
+1].Opcode
)
6163 #ifdef USE_CF_FOR_CONTINUE_BREAK
6164 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6166 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_CONTINUE
;
6169 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGT
) )
6171 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
6172 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
6176 else if((OPCODE_KIL
== pILInst
[i
+1].Opcode
)||(OPCODE_KIL_NV
== pILInst
[i
+1].Opcode
))
6178 if ( GL_FALSE
== assemble_KIL(pR700AsmCode
, SQ_OP2_INST_KILLGT
) )
6185 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETGT
) )
6187 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
6188 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
6192 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
6193 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
6199 struct prog_src_register SrcRegSave
[2];
6200 SrcRegSave
[0] = pILInst
[i
].SrcReg
[0];
6201 SrcRegSave
[1] = pILInst
[i
].SrcReg
[1];
6202 pILInst
[i
].SrcReg
[0] = SrcRegSave
[1];
6203 pILInst
[i
].SrcReg
[1] = SrcRegSave
[0];
6204 if(OPCODE_IF
== pILInst
[i
+1].Opcode
)
6206 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6207 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGE
) )
6209 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
6210 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
6214 else if(OPCODE_BRK
== pILInst
[i
+1].Opcode
)
6216 #ifdef USE_CF_FOR_CONTINUE_BREAK
6217 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6219 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_BREAK
;
6221 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGE
) )
6223 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
6224 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
6228 else if(OPCODE_CONT
== pILInst
[i
+1].Opcode
)
6230 #ifdef USE_CF_FOR_CONTINUE_BREAK
6231 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6233 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_CONTINUE
;
6236 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETGE
) )
6238 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
6239 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
6243 else if((OPCODE_KIL
== pILInst
[i
+1].Opcode
)||(OPCODE_KIL_NV
== pILInst
[i
+1].Opcode
))
6245 if ( GL_FALSE
== assemble_KIL(pR700AsmCode
, SQ_OP2_INST_KILLGE
) )
6252 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETGE
) )
6254 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
6255 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
6259 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
6260 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
6265 if(OPCODE_IF
== pILInst
[i
+1].Opcode
)
6267 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6268 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETNE
) )
6273 else if(OPCODE_BRK
== pILInst
[i
+1].Opcode
)
6275 #ifdef USE_CF_FOR_CONTINUE_BREAK
6276 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6278 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_BREAK
;
6280 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETNE
) )
6285 else if(OPCODE_CONT
== pILInst
[i
+1].Opcode
)
6287 #ifdef USE_CF_FOR_CONTINUE_BREAK
6288 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6290 pR700AsmCode
->alu_x_opcode
= SQ_CF_INST_ALU_CONTINUE
;
6292 if ( GL_FALSE
== assemble_LOGIC_PRED(pR700AsmCode
, SQ_OP2_INST_PRED_SETNE
) )
6297 else if((OPCODE_KIL
== pILInst
[i
+1].Opcode
)||(OPCODE_KIL_NV
== pILInst
[i
+1].Opcode
))
6299 if ( GL_FALSE
== assemble_KIL(pR700AsmCode
, SQ_OP2_INST_KILLNE
) )
6306 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETNE
) )
6314 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
6319 if ( GL_FALSE
== assemble_MOV(pR700AsmCode
) )
6325 if( (i
+1)<uiNumberInsts
)
6327 if(OPCODE_END
!= pILInst
[i
+1].Opcode
)
6329 if( GL_TRUE
== IsTex(pILInst
[i
+1].Opcode
) )
6331 pR700AsmCode
->pInstDeps
[i
+1].nDstDep
= i
+1; //=1?
6341 if ( GL_FALSE
== assemble_TEX(pR700AsmCode
) )
6346 if ( GL_FALSE
== assemble_math_function(pR700AsmCode
, SQ_OP2_INST_TRUNC
) )
6351 if ( GL_FALSE
== assemble_XPD(pR700AsmCode
) )
6357 GLboolean bHasElse
= GL_FALSE
;
6359 if(pILInst
[pILInst
[i
].BranchTarget
- 1].Opcode
== OPCODE_ELSE
)
6364 if ( GL_FALSE
== assemble_IF(pR700AsmCode
, bHasElse
) )
6372 if ( GL_FALSE
== assemble_ELSE(pR700AsmCode
) )
6377 if ( GL_FALSE
== assemble_ENDIF(pR700AsmCode
) )
6381 case OPCODE_BGNLOOP
:
6382 if( GL_FALSE
== assemble_BGNLOOP(pR700AsmCode
) )
6389 if( GL_FALSE
== assemble_BRK(pR700AsmCode
) )
6396 if( GL_FALSE
== assemble_CONT(pR700AsmCode
) )
6402 case OPCODE_ENDLOOP
:
6403 if( GL_FALSE
== assemble_ENDLOOP(pR700AsmCode
) )
6410 if( GL_FALSE
== assemble_BGNSUB(pR700AsmCode
, i
) )
6417 if( GL_FALSE
== assemble_RET(pR700AsmCode
) )
6424 if( GL_FALSE
== assemble_CAL(pR700AsmCode
,
6425 pILInst
[i
].BranchTarget
,
6433 //case OPCODE_EXPORT:
6434 // if ( GL_FALSE == assemble_EXPORT() )
6439 return assemble_ENDSUB(pR700AsmCode
);
6442 //pR700AsmCode->uiCurInst = i;
6443 //This is to remaind that if in later exoort there is depth/stencil
6444 //export, we need a mov to re-arrange DST channel, where using a
6445 //psuedo inst, we will use this end inst to do it.
6449 radeon_error("internal: unknown instruction\n");
6457 GLboolean
InitShaderProgram(r700_AssemblerBase
* pAsm
)
6459 setRetInLoopFlag(pAsm
, SQ_SEL_0
);
6463 GLboolean
RelocProgram(r700_AssemblerBase
* pAsm
)
6467 TypedShaderList
* plstCFmain
;
6468 TypedShaderList
* plstCFsub
;
6470 R700ShaderInstruction
* pInst
;
6471 R700ControlFlowGenericClause
* pCFInst
;
6473 plstCFmain
= pAsm
->CALLSTACK
[0].plstCFInstructions_local
;
6475 /* remove flags init if they are not used */
6476 if((pAsm
->unCFflags
& HAS_LOOPRET
) == 0)
6478 R700ControlFlowALUClause
* pCF_ALU
;
6479 pInst
= plstCFmain
->pHead
;
6482 if(SIT_CF_ALU
== pInst
->m_ShaderInstType
)
6484 pCF_ALU
= (R700ControlFlowALUClause
*)pInst
;
6485 if(1 == pCF_ALU
->m_Word1
.f
.count
)
6487 pCF_ALU
->m_Word1
.f
.cf_inst
= SQ_CF_INST_NOP
;
6491 R700ALUInstruction
* pALU
= pCF_ALU
->m_pLinkedALUInstruction
;
6493 pALU
->m_pLinkedALUClause
= NULL
;
6494 pALU
= (R700ALUInstruction
*)(pALU
->pNextInst
);
6495 pALU
->m_pLinkedALUClause
= pCF_ALU
;
6496 pCF_ALU
->m_pLinkedALUInstruction
= pALU
;
6498 pCF_ALU
->m_Word1
.f
.count
--;
6502 pInst
= pInst
->pNextInst
;
6506 if(0 == pAsm
->unSubArrayPointer
)
6511 if(pAsm
->CALLSTACK
[0].max
> 0)
6513 pAsm
->pR700Shader
->uStackSize
= ((pAsm
->CALLSTACK
[0].max
+ 3)>>2) + 2;
6516 unCFoffset
= plstCFmain
->uNumOfNode
;
6519 for(i
=0; i
<pAsm
->unSubArrayPointer
; i
++)
6521 pAsm
->subs
[i
].unCFoffset
= unCFoffset
;
6522 plstCFsub
= &(pAsm
->subs
[i
].lstCFInstructions_local
);
6524 pInst
= plstCFsub
->pHead
;
6526 /* reloc instructions */
6529 if(SIT_CF_GENERIC
== pInst
->m_ShaderInstType
)
6531 pCFInst
= (R700ControlFlowGenericClause
*)pInst
;
6533 switch (pCFInst
->m_Word1
.f
.cf_inst
)
6535 case SQ_CF_INST_POP
:
6536 case SQ_CF_INST_JUMP
:
6537 case SQ_CF_INST_ELSE
:
6538 case SQ_CF_INST_LOOP_END
:
6539 case SQ_CF_INST_LOOP_START
:
6540 case SQ_CF_INST_LOOP_START_NO_AL
:
6541 case SQ_CF_INST_LOOP_CONTINUE
:
6542 case SQ_CF_INST_LOOP_BREAK
:
6543 pCFInst
->m_Word0
.f
.addr
+= unCFoffset
;
6550 pInst
->m_uIndex
+= unCFoffset
;
6552 pInst
= pInst
->pNextInst
;
6555 /* Put sub into main */
6556 plstCFmain
->pTail
->pNextInst
= plstCFsub
->pHead
;
6557 plstCFmain
->pTail
= plstCFsub
->pTail
;
6558 plstCFmain
->uNumOfNode
+= plstCFsub
->uNumOfNode
;
6560 unCFoffset
+= plstCFsub
->uNumOfNode
;
6564 for(i
=0; i
<pAsm
->unCallerArrayPointer
; i
++)
6566 pAsm
->callers
[i
].cf_ptr
->m_Word0
.f
.addr
6567 = pAsm
->subs
[pAsm
->callers
[i
].subDescIndex
].unCFoffset
;
6573 GLboolean
Process_Export(r700_AssemblerBase
* pAsm
,
6575 GLuint export_starting_index
,
6576 GLuint export_count
,
6577 GLuint starting_register_number
,
6578 GLboolean is_depth_export
)
6580 unsigned char ucWriteMask
;
6582 check_current_clause(pAsm
, CF_EMPTY_CLAUSE
);
6583 check_current_clause(pAsm
, CF_EXPORT_CLAUSE
); //alloc the cf_current_export_clause_ptr
6585 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.type
= type
;
6589 case SQ_EXPORT_PIXEL
:
6590 if(GL_TRUE
== is_depth_export
)
6592 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_PIXEL_Z
;
6596 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_PIXEL_MRT0
+ export_starting_index
;
6601 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_POS_0
+ export_starting_index
;
6604 case SQ_EXPORT_PARAM
:
6605 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= 0x0 + export_starting_index
;
6609 radeon_error("Unknown export type: %d\n", type
);
6614 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.rw_gpr
= starting_register_number
;
6616 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.rw_rel
= SQ_ABSOLUTE
;
6617 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.index_gpr
= 0x0;
6618 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.elem_size
= 0x3;
6620 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.burst_count
= (export_count
- 1);
6621 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
6622 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
6623 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT
; // _DONE
6624 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
6625 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
6627 if (export_count
== 1)
6629 ucWriteMask
= pAsm
->pucOutMask
[starting_register_number
- pAsm
->starting_export_register_number
];
6630 /* exports Z as a float into Red channel */
6631 if (GL_TRUE
== is_depth_export
)
6634 if( (ucWriteMask
& 0x1) != 0)
6636 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_X
;
6640 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_MASK
;
6642 if( ((ucWriteMask
>>1) & 0x1) != 0)
6644 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_Y
;
6648 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_MASK
;
6650 if( ((ucWriteMask
>>2) & 0x1) != 0)
6652 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_Z
;
6656 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_MASK
;
6658 if( ((ucWriteMask
>>3) & 0x1) != 0)
6660 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_W
;
6664 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_MASK
;
6669 // This should only be used if all components for all registers have been written
6670 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_X
;
6671 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_Y
;
6672 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_Z
;
6673 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_W
;
6676 pAsm
->cf_last_export_ptr
= pAsm
->cf_current_export_clause_ptr
;
6681 GLboolean
Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase
*pAsm
, BITS depth_channel_select
)
6683 gl_inst_opcode Opcode_save
= pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
; //Should be OPCODE_END
6684 pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
= OPCODE_MOV
;
6686 // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
6688 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
6690 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
6691 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
6692 pAsm
->D
.dst
.reg
= pAsm
->depth_export_register_number
;
6694 pAsm
->D
.dst
.writex
= 1; // depth goes in R channel for HW
6696 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
6697 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
6698 pAsm
->S
[0].src
.reg
= pAsm
->depth_export_register_number
;
6700 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), depth_channel_select
);
6702 noneg_PVSSRC(&(pAsm
->S
[0].src
));
6704 if( GL_FALSE
== next_ins(pAsm
) )
6709 pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
= Opcode_save
;
6714 GLboolean
Process_Fragment_Exports(r700_AssemblerBase
*pR700AsmCode
,
6715 GLbitfield OutputsWritten
)
6718 GLuint export_count
= 0;
6720 if(pR700AsmCode
->depth_export_register_number
>= 0)
6722 if( GL_FALSE
== Move_Depth_Exports_To_Correct_Channels(pR700AsmCode
, SQ_SEL_Z
) ) // depth
6728 unBit
= 1 << FRAG_RESULT_COLOR
;
6729 if(OutputsWritten
& unBit
)
6731 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6735 pR700AsmCode
->uiFP_OutputMap
[FRAG_RESULT_COLOR
],
6742 unBit
= 1 << FRAG_RESULT_DEPTH
;
6743 if(OutputsWritten
& unBit
)
6745 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6749 pR700AsmCode
->uiFP_OutputMap
[FRAG_RESULT_DEPTH
],
6756 /* Need to export something, otherwise we'll hang
6757 * results are undefined anyway */
6758 if(export_count
== 0)
6760 Process_Export(pR700AsmCode
, SQ_EXPORT_PIXEL
, 0, 1, 0, GL_FALSE
);
6763 if(pR700AsmCode
->cf_last_export_ptr
!= NULL
)
6765 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
6766 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.end_of_program
= 0x1;
6772 GLboolean
Process_Vertex_Exports(r700_AssemblerBase
*pR700AsmCode
,
6773 GLbitfield OutputsWritten
)
6778 GLuint export_starting_index
= 0;
6779 GLuint export_count
= pR700AsmCode
->number_of_exports
;
6781 unBit
= 1 << VERT_RESULT_HPOS
;
6782 if(OutputsWritten
& unBit
)
6784 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6786 export_starting_index
,
6788 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_HPOS
],
6796 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
6799 pR700AsmCode
->number_of_exports
= export_count
;
6801 unBit
= 1 << VERT_RESULT_COL0
;
6802 if(OutputsWritten
& unBit
)
6804 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6806 export_starting_index
,
6808 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_COL0
],
6814 export_starting_index
++;
6817 unBit
= 1 << VERT_RESULT_COL1
;
6818 if(OutputsWritten
& unBit
)
6820 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6822 export_starting_index
,
6824 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_COL1
],
6830 export_starting_index
++;
6833 unBit
= 1 << VERT_RESULT_FOGC
;
6834 if(OutputsWritten
& unBit
)
6836 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6838 export_starting_index
,
6840 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_FOGC
],
6846 export_starting_index
++;
6851 unBit
= 1 << (VERT_RESULT_TEX0
+ i
);
6852 if(OutputsWritten
& unBit
)
6854 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6856 export_starting_index
,
6858 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_TEX0
+ i
],
6864 export_starting_index
++;
6868 for(i
=VERT_RESULT_VAR0
; i
<VERT_RESULT_MAX
; i
++)
6871 if(OutputsWritten
& unBit
)
6873 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6875 export_starting_index
,
6877 pR700AsmCode
->ucVP_OutputMap
[i
],
6883 export_starting_index
++;
6887 // At least one param should be exported
6890 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
6894 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6898 pR700AsmCode
->starting_export_register_number
,
6904 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_0
;
6905 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_0
;
6906 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_0
;
6907 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_1
;
6908 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
6911 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.end_of_program
= 0x1;
6916 GLboolean
Clean_Up_Assembler(r700_AssemblerBase
*pR700AsmCode
)
6918 FREE(pR700AsmCode
->pucOutMask
);
6919 FREE(pR700AsmCode
->pInstDeps
);
6921 if(NULL
!= pR700AsmCode
->subs
)
6923 FREE(pR700AsmCode
->subs
);
6925 if(NULL
!= pR700AsmCode
->callers
)
6927 FREE(pR700AsmCode
->callers
);