2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
33 #include "main/mtypes.h"
34 #include "main/imports.h"
36 #include "radeon_debug.h"
37 #include "r600_context.h"
39 #include "r700_assembler.h"
41 BITS
addrmode_PVSDST(PVSDST
* pPVSDST
)
43 return pPVSDST
->addrmode0
| ((BITS
)pPVSDST
->addrmode1
<< 1);
46 void setaddrmode_PVSDST(PVSDST
* pPVSDST
, BITS addrmode
)
48 pPVSDST
->addrmode0
= addrmode
& 1;
49 pPVSDST
->addrmode1
= (addrmode
>> 1) & 1;
52 void nomask_PVSDST(PVSDST
* pPVSDST
)
54 pPVSDST
->writex
= pPVSDST
->writey
= pPVSDST
->writez
= pPVSDST
->writew
= 1;
57 BITS
addrmode_PVSSRC(PVSSRC
* pPVSSRC
)
59 return pPVSSRC
->addrmode0
| ((BITS
)pPVSSRC
->addrmode1
<< 1);
62 void setaddrmode_PVSSRC(PVSSRC
* pPVSSRC
, BITS addrmode
)
64 pPVSSRC
->addrmode0
= addrmode
& 1;
65 pPVSSRC
->addrmode1
= (addrmode
>> 1) & 1;
69 void setswizzle_PVSSRC(PVSSRC
* pPVSSRC
, BITS swz
)
74 pPVSSRC
->swizzlew
= swz
;
77 void noswizzle_PVSSRC(PVSSRC
* pPVSSRC
)
79 pPVSSRC
->swizzlex
= SQ_SEL_X
;
80 pPVSSRC
->swizzley
= SQ_SEL_Y
;
81 pPVSSRC
->swizzlez
= SQ_SEL_Z
;
82 pPVSSRC
->swizzlew
= SQ_SEL_W
;
86 swizzleagain_PVSSRC(PVSSRC
* pPVSSRC
, BITS x
, BITS y
, BITS z
, BITS w
)
90 case SQ_SEL_X
: x
= pPVSSRC
->swizzlex
;
92 case SQ_SEL_Y
: x
= pPVSSRC
->swizzley
;
94 case SQ_SEL_Z
: x
= pPVSSRC
->swizzlez
;
96 case SQ_SEL_W
: x
= pPVSSRC
->swizzlew
;
103 case SQ_SEL_X
: y
= pPVSSRC
->swizzlex
;
105 case SQ_SEL_Y
: y
= pPVSSRC
->swizzley
;
107 case SQ_SEL_Z
: y
= pPVSSRC
->swizzlez
;
109 case SQ_SEL_W
: y
= pPVSSRC
->swizzlew
;
116 case SQ_SEL_X
: z
= pPVSSRC
->swizzlex
;
118 case SQ_SEL_Y
: z
= pPVSSRC
->swizzley
;
120 case SQ_SEL_Z
: z
= pPVSSRC
->swizzlez
;
122 case SQ_SEL_W
: z
= pPVSSRC
->swizzlew
;
129 case SQ_SEL_X
: w
= pPVSSRC
->swizzlex
;
131 case SQ_SEL_Y
: w
= pPVSSRC
->swizzley
;
133 case SQ_SEL_Z
: w
= pPVSSRC
->swizzlez
;
135 case SQ_SEL_W
: w
= pPVSSRC
->swizzlew
;
140 pPVSSRC
->swizzlex
= x
;
141 pPVSSRC
->swizzley
= y
;
142 pPVSSRC
->swizzlez
= z
;
143 pPVSSRC
->swizzlew
= w
;
146 void neg_PVSSRC(PVSSRC
* pPVSSRC
)
154 void noneg_PVSSRC(PVSSRC
* pPVSSRC
)
162 // negate argument (for SUB instead of ADD and alike)
163 void flipneg_PVSSRC(PVSSRC
* pPVSSRC
)
165 pPVSSRC
->negx
= !pPVSSRC
->negx
;
166 pPVSSRC
->negy
= !pPVSSRC
->negy
;
167 pPVSSRC
->negz
= !pPVSSRC
->negz
;
168 pPVSSRC
->negw
= !pPVSSRC
->negw
;
171 void zerocomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
)
175 case 0: pPVSSRC
->swizzlex
= SQ_SEL_0
; pPVSSRC
->negx
= 0; break;
176 case 1: pPVSSRC
->swizzley
= SQ_SEL_0
; pPVSSRC
->negy
= 0; break;
177 case 2: pPVSSRC
->swizzlez
= SQ_SEL_0
; pPVSSRC
->negz
= 0; break;
178 case 3: pPVSSRC
->swizzlew
= SQ_SEL_0
; pPVSSRC
->negw
= 0; break;
183 void onecomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
)
187 case 0: pPVSSRC
->swizzlex
= SQ_SEL_1
; pPVSSRC
->negx
= 0; break;
188 case 1: pPVSSRC
->swizzley
= SQ_SEL_1
; pPVSSRC
->negy
= 0; break;
189 case 2: pPVSSRC
->swizzlez
= SQ_SEL_1
; pPVSSRC
->negz
= 0; break;
190 case 3: pPVSSRC
->swizzlew
= SQ_SEL_1
; pPVSSRC
->negw
= 0; break;
195 BITS
is_misc_component_exported(VAP_OUT_VTX_FMT_0
* pOutVTXFmt0
)
197 return (pOutVTXFmt0
->point_size
|
198 pOutVTXFmt0
->edge_flag
|
199 pOutVTXFmt0
->rta_index
|
200 pOutVTXFmt0
->kill_flag
|
201 pOutVTXFmt0
->viewport_index
);
204 BITS
is_depth_component_exported(OUT_FRAGMENT_FMT_0
* pFPOutFmt
)
206 return (pFPOutFmt
->depth
|
207 pFPOutFmt
->stencil_ref
|
209 pFPOutFmt
->coverage_to_mask
);
212 GLboolean
is_reduction_opcode(PVSDWORD
* dest
)
214 if (dest
->dst
.op3
== 0)
216 if ( (dest
->dst
.opcode
== SQ_OP2_INST_DOT4
|| dest
->dst
.opcode
== SQ_OP2_INST_DOT4_IEEE
|| dest
->dst
.opcode
== SQ_OP2_INST_CUBE
) )
224 GLuint
GetSurfaceFormat(GLenum eType
, GLuint nChannels
, GLuint
* pClient_size
)
226 GLuint format
= FMT_INVALID
;
227 GLuint uiElemSize
= 0;
232 case GL_UNSIGNED_BYTE
:
237 format
= FMT_8
; break;
239 format
= FMT_8_8
; break;
241 format
= FMT_8_8_8
; break;
243 format
= FMT_8_8_8_8
; break;
249 case GL_UNSIGNED_SHORT
:
255 format
= FMT_16
; break;
257 format
= FMT_16_16
; break;
259 format
= FMT_16_16_16
; break;
261 format
= FMT_16_16_16_16
; break;
267 case GL_UNSIGNED_INT
:
273 format
= FMT_32
; break;
275 format
= FMT_32_32
; break;
277 format
= FMT_32_32_32
; break;
279 format
= FMT_32_32_32_32
; break;
290 format
= FMT_32_FLOAT
; break;
292 format
= FMT_32_32_FLOAT
; break;
294 format
= FMT_32_32_32_FLOAT
; break;
296 format
= FMT_32_32_32_32_FLOAT
; break;
306 format
= FMT_32_FLOAT
; break;
308 format
= FMT_32_32_FLOAT
; break;
310 format
= FMT_32_32_32_FLOAT
; break;
312 format
= FMT_32_32_32_32_FLOAT
; break;
319 //GL_ASSERT_NO_CASE();
322 if(NULL
!= pClient_size
)
324 *pClient_size
= uiElemSize
* nChannels
;
330 unsigned int r700GetNumOperands(r700_AssemblerBase
* pAsm
)
337 switch (pAsm
->D
.dst
.opcode
)
339 case SQ_OP2_INST_ADD
:
340 case SQ_OP2_INST_KILLGT
:
341 case SQ_OP2_INST_MUL
:
342 case SQ_OP2_INST_MAX
:
343 case SQ_OP2_INST_MIN
:
344 //case SQ_OP2_INST_MAX_DX10:
345 //case SQ_OP2_INST_MIN_DX10:
346 case SQ_OP2_INST_SETGT
:
347 case SQ_OP2_INST_SETGE
:
348 case SQ_OP2_INST_PRED_SETE
:
349 case SQ_OP2_INST_PRED_SETGT
:
350 case SQ_OP2_INST_PRED_SETGE
:
351 case SQ_OP2_INST_PRED_SETNE
:
352 case SQ_OP2_INST_DOT4
:
353 case SQ_OP2_INST_DOT4_IEEE
:
354 case SQ_OP2_INST_CUBE
:
357 case SQ_OP2_INST_MOV
:
358 case SQ_OP2_INST_FRACT
:
359 case SQ_OP2_INST_FLOOR
:
360 case SQ_OP2_INST_EXP_IEEE
:
361 case SQ_OP2_INST_LOG_CLAMPED
:
362 case SQ_OP2_INST_LOG_IEEE
:
363 case SQ_OP2_INST_RECIP_IEEE
:
364 case SQ_OP2_INST_RECIPSQRT_IEEE
:
365 case SQ_OP2_INST_FLT_TO_INT
:
366 case SQ_OP2_INST_SIN
:
367 case SQ_OP2_INST_COS
:
370 default: radeon_error(
371 "Need instruction operand number for %x.\n", pAsm
->D
.dst
.opcode
);
377 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt
, r700_AssemblerBase
* pAsm
, R700_Shader
* pShader
)
381 Init_R700_Shader(pShader
);
382 pAsm
->pR700Shader
= pShader
;
383 pAsm
->currentShaderType
= spt
;
385 pAsm
->cf_last_export_ptr
= NULL
;
387 pAsm
->cf_current_export_clause_ptr
= NULL
;
388 pAsm
->cf_current_alu_clause_ptr
= NULL
;
389 pAsm
->cf_current_tex_clause_ptr
= NULL
;
390 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
391 pAsm
->cf_current_cf_clause_ptr
= NULL
;
393 // No clause has been created yet
394 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
396 pAsm
->number_of_colorandz_exports
= 0;
397 pAsm
->number_of_exports
= 0;
398 pAsm
->number_of_export_opcodes
= 0;
406 pAsm
->uLastPosUpdate
= 0;
408 *(BITS
*) &pAsm
->fp_stOutFmt0
= 0;
412 pAsm
->number_used_registers
= 0;
413 pAsm
->uUsedConsts
= 256;
417 pAsm
->uBoolConsts
= 0;
418 pAsm
->uIntConsts
= 0;
423 pAsm
->fc_stack
[0].type
= FC_NONE
;
425 pAsm
->branch_depth
= 0;
426 pAsm
->max_branch_depth
= 0;
431 pAsm
->aArgSubst
[3] = (-1);
435 for (i
=0; i
<NUMBER_OF_OUTPUT_COLORS
; i
++)
437 pAsm
->color_export_register_number
[i
] = (-1);
441 pAsm
->depth_export_register_number
= (-1);
442 pAsm
->stencil_export_register_number
= (-1);
443 pAsm
->coverage_to_mask_export_register_number
= (-1);
444 pAsm
->mask_export_register_number
= (-1);
446 pAsm
->starting_export_register_number
= 0;
447 pAsm
->starting_vfetch_register_number
= 0;
448 pAsm
->starting_temp_register_number
= 0;
449 pAsm
->uFirstHelpReg
= 0;
452 pAsm
->input_position_is_used
= GL_FALSE
;
453 pAsm
->input_normal_is_used
= GL_FALSE
;
456 for (i
=0; i
<NUMBER_OF_INPUT_COLORS
; i
++)
458 pAsm
->input_color_is_used
[ i
] = GL_FALSE
;
461 for (i
=0; i
<NUMBER_OF_TEXTURE_UNITS
; i
++)
463 pAsm
->input_texture_unit_is_used
[ i
] = GL_FALSE
;
466 for (i
=0; i
<VERT_ATTRIB_MAX
; i
++)
468 pAsm
->vfetch_instruction_ptr_array
[ i
] = NULL
;
471 pAsm
->number_of_inputs
= 0;
473 pAsm
->is_tex
= GL_FALSE
;
474 pAsm
->need_tex_barrier
= GL_FALSE
;
479 GLboolean
IsTex(gl_inst_opcode Opcode
)
481 if( (OPCODE_TEX
==Opcode
) || (OPCODE_TXP
==Opcode
) || (OPCODE_TXB
==Opcode
) )
488 GLboolean
IsAlu(gl_inst_opcode Opcode
)
490 //TODO : more for fc and ex for higher spec.
498 int check_current_clause(r700_AssemblerBase
* pAsm
,
499 CF_CLAUSE_TYPE new_clause_type
)
501 if (pAsm
->cf_current_clause_type
!= new_clause_type
)
502 { //Close last open clause
503 switch (pAsm
->cf_current_clause_type
)
506 if ( pAsm
->cf_current_alu_clause_ptr
!= NULL
)
508 pAsm
->cf_current_alu_clause_ptr
= NULL
;
512 if ( pAsm
->cf_current_vtx_clause_ptr
!= NULL
)
514 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
518 if ( pAsm
->cf_current_tex_clause_ptr
!= NULL
)
520 pAsm
->cf_current_tex_clause_ptr
= NULL
;
523 case CF_EXPORT_CLAUSE
:
524 if ( pAsm
->cf_current_export_clause_ptr
!= NULL
)
526 pAsm
->cf_current_export_clause_ptr
= NULL
;
529 case CF_OTHER_CLAUSE
:
530 if ( pAsm
->cf_current_cf_clause_ptr
!= NULL
)
532 pAsm
->cf_current_cf_clause_ptr
= NULL
;
535 case CF_EMPTY_CLAUSE
:
539 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type
);
543 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
546 switch (new_clause_type
)
549 pAsm
->cf_current_clause_type
= CF_ALU_CLAUSE
;
552 pAsm
->cf_current_clause_type
= CF_VTX_CLAUSE
;
555 pAsm
->cf_current_clause_type
= CF_TEX_CLAUSE
;
557 case CF_EXPORT_CLAUSE
:
559 R700ControlFlowSXClause
* pR700ControlFlowSXClause
560 = (R700ControlFlowSXClause
*) CALLOC_STRUCT(R700ControlFlowSXClause
);
562 // Add new export instruction to control flow program
563 if (pR700ControlFlowSXClause
!= 0)
565 pAsm
->cf_current_export_clause_ptr
= pR700ControlFlowSXClause
;
566 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause
);
567 AddCFInstruction( pAsm
->pR700Shader
,
568 (R700ControlFlowInstruction
*)pR700ControlFlowSXClause
);
573 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
576 pAsm
->cf_current_clause_type
= CF_EXPORT_CLAUSE
;
579 case CF_EMPTY_CLAUSE
:
581 case CF_OTHER_CLAUSE
:
582 pAsm
->cf_current_clause_type
= CF_OTHER_CLAUSE
;
586 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type
);
594 GLboolean
add_vfetch_instruction(r700_AssemblerBase
* pAsm
,
595 R700VertexInstruction
* vertex_instruction_ptr
)
597 if( GL_FALSE
== check_current_clause(pAsm
, CF_VTX_CLAUSE
) )
602 if( pAsm
->cf_current_vtx_clause_ptr
== NULL
||
603 ( (pAsm
->cf_current_vtx_clause_ptr
!= NULL
) &&
604 (pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
>= GetCFMaxInstructions(pAsm
->cf_current_vtx_clause_ptr
->m_ShaderInstType
)-1)
607 // Create new Vfetch control flow instruction for this new clause
608 pAsm
->cf_current_vtx_clause_ptr
= (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
610 if (pAsm
->cf_current_vtx_clause_ptr
!= NULL
)
612 Init_R700ControlFlowGenericClause(pAsm
->cf_current_vtx_clause_ptr
);
613 AddCFInstruction( pAsm
->pR700Shader
,
614 (R700ControlFlowInstruction
*)pAsm
->cf_current_vtx_clause_ptr
);
618 radeon_error("Could not allocate a new VFetch CF instruction.\n");
622 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.pop_count
= 0x0;
623 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
624 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
625 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
= 0x0;
626 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
627 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
628 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_VTX
;
629 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
630 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
632 LinkVertexInstruction(pAsm
->cf_current_vtx_clause_ptr
, vertex_instruction_ptr
);
636 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
++;
639 AddVTXInstruction(pAsm
->pR700Shader
, vertex_instruction_ptr
);
644 GLboolean
add_tex_instruction(r700_AssemblerBase
* pAsm
,
645 R700TextureInstruction
* tex_instruction_ptr
)
647 if ( GL_FALSE
== check_current_clause(pAsm
, CF_TEX_CLAUSE
) )
652 if ( pAsm
->cf_current_tex_clause_ptr
== NULL
||
653 ( (pAsm
->cf_current_tex_clause_ptr
!= NULL
) &&
654 (pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.count
>= GetCFMaxInstructions(pAsm
->cf_current_tex_clause_ptr
->m_ShaderInstType
)-1)
657 // new tex cf instruction for this new clause
658 pAsm
->cf_current_tex_clause_ptr
= (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
660 if (pAsm
->cf_current_tex_clause_ptr
!= NULL
)
662 Init_R700ControlFlowGenericClause(pAsm
->cf_current_tex_clause_ptr
);
663 AddCFInstruction( pAsm
->pR700Shader
,
664 (R700ControlFlowInstruction
*)pAsm
->cf_current_tex_clause_ptr
);
668 radeon_error("Could not allocate a new TEX CF instruction.\n");
672 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.pop_count
= 0x0;
673 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
674 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
676 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
677 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
678 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_TEX
;
679 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
680 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.barrier
= 0x0; //0x1;
684 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.count
++;
687 // If this clause constains any TEX instruction that is dependent on a previous instruction,
688 // set the barrier bit
689 if( pAsm
->pInstDeps
[pAsm
->uiCurInst
].nDstDep
> (-1) || pAsm
->need_tex_barrier
== GL_TRUE
)
691 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
694 if(NULL
== pAsm
->cf_current_tex_clause_ptr
->m_pLinkedTEXInstruction
)
696 pAsm
->cf_current_tex_clause_ptr
->m_pLinkedTEXInstruction
= tex_instruction_ptr
;
697 tex_instruction_ptr
->m_pLinkedGenericClause
= pAsm
->cf_current_tex_clause_ptr
;
700 AddTEXInstruction(pAsm
->pR700Shader
, tex_instruction_ptr
);
705 GLboolean
assemble_vfetch_instruction(r700_AssemblerBase
* pAsm
,
707 GLuint destination_register
,
708 GLuint number_of_elements
,
709 GLenum dataElementType
,
710 VTX_FETCH_METHOD
* pFetchMethod
)
712 GLuint client_size_inbyte
;
714 GLuint mega_fetch_count
;
715 GLuint is_mega_fetch_flag
;
717 R700VertexGenericFetch
* vfetch_instruction_ptr
;
718 R700VertexGenericFetch
* assembled_vfetch_instruction_ptr
= pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
];
720 if (assembled_vfetch_instruction_ptr
== NULL
)
722 vfetch_instruction_ptr
= (R700VertexGenericFetch
*) CALLOC_STRUCT(R700VertexGenericFetch
);
723 if (vfetch_instruction_ptr
== NULL
)
727 Init_R700VertexGenericFetch(vfetch_instruction_ptr
);
731 vfetch_instruction_ptr
= assembled_vfetch_instruction_ptr
;
734 data_format
= GetSurfaceFormat(dataElementType
, number_of_elements
, &client_size_inbyte
);
736 if(GL_TRUE
== pFetchMethod
->bEnableMini
) //More conditions here
742 mega_fetch_count
= MEGA_FETCH_BYTES
- 1;
743 is_mega_fetch_flag
= 0x1;
744 pFetchMethod
->mega_fetch_remainder
= MEGA_FETCH_BYTES
- client_size_inbyte
;
747 vfetch_instruction_ptr
->m_Word0
.f
.vtx_inst
= SQ_VTX_INST_FETCH
;
748 vfetch_instruction_ptr
->m_Word0
.f
.fetch_type
= SQ_VTX_FETCH_VERTEX_DATA
;
749 vfetch_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
751 vfetch_instruction_ptr
->m_Word0
.f
.buffer_id
= gl_client_id
;
752 vfetch_instruction_ptr
->m_Word0
.f
.src_gpr
= 0x0;
753 vfetch_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
754 vfetch_instruction_ptr
->m_Word0
.f
.src_sel_x
= SQ_SEL_X
;
755 vfetch_instruction_ptr
->m_Word0
.f
.mega_fetch_count
= mega_fetch_count
;
757 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (number_of_elements
< 1) ? SQ_SEL_0
: SQ_SEL_X
;
758 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (number_of_elements
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
759 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (number_of_elements
< 3) ? SQ_SEL_0
: SQ_SEL_Z
;
760 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (number_of_elements
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
762 vfetch_instruction_ptr
->m_Word1
.f
.use_const_fields
= 1;
764 // Destination register
765 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_gpr
= destination_register
;
766 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_rel
= SQ_ABSOLUTE
;
768 vfetch_instruction_ptr
->m_Word2
.f
.offset
= 0;
769 vfetch_instruction_ptr
->m_Word2
.f
.const_buf_no_stride
= 0x0;
771 vfetch_instruction_ptr
->m_Word2
.f
.mega_fetch
= is_mega_fetch_flag
;
773 if (assembled_vfetch_instruction_ptr
== NULL
)
775 if ( GL_FALSE
== add_vfetch_instruction(pAsm
, (R700VertexInstruction
*)vfetch_instruction_ptr
) )
780 if (pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
] != NULL
)
786 pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
] = vfetch_instruction_ptr
;
793 GLuint
gethelpr(r700_AssemblerBase
* pAsm
)
795 GLuint r
= pAsm
->uHelpReg
;
797 if (pAsm
->uHelpReg
> pAsm
->number_used_registers
)
799 pAsm
->number_used_registers
= pAsm
->uHelpReg
;
803 void resethelpr(r700_AssemblerBase
* pAsm
)
805 pAsm
->uHelpReg
= pAsm
->uFirstHelpReg
;
808 void checkop_init(r700_AssemblerBase
* pAsm
)
814 pAsm
->aArgSubst
[3] = -1;
817 GLboolean
mov_temp(r700_AssemblerBase
* pAsm
, int src
)
819 GLuint tmp
= gethelpr(pAsm
);
821 //mov src to temp helper gpr.
822 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
824 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
826 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
827 pAsm
->D
.dst
.reg
= tmp
;
829 nomask_PVSDST(&(pAsm
->D
.dst
));
831 if( GL_FALSE
== assemble_src(pAsm
, src
, 0) )
836 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
837 noneg_PVSSRC(&(pAsm
->S
[0].src
));
839 if( GL_FALSE
== next_ins(pAsm
) )
844 pAsm
->aArgSubst
[1 + src
] = tmp
;
849 GLboolean
checkop1(r700_AssemblerBase
* pAsm
)
855 GLboolean
checkop2(r700_AssemblerBase
* pAsm
)
857 GLboolean bSrcConst
[2];
858 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
862 if( (pILInst
->SrcReg
[0].File
== PROGRAM_CONSTANT
) ||
863 (pILInst
->SrcReg
[0].File
== PROGRAM_LOCAL_PARAM
) ||
864 (pILInst
->SrcReg
[0].File
== PROGRAM_ENV_PARAM
) ||
865 (pILInst
->SrcReg
[0].File
== PROGRAM_STATE_VAR
) )
867 bSrcConst
[0] = GL_TRUE
;
871 bSrcConst
[0] = GL_FALSE
;
873 if( (pILInst
->SrcReg
[1].File
== PROGRAM_CONSTANT
) ||
874 (pILInst
->SrcReg
[1].File
== PROGRAM_LOCAL_PARAM
) ||
875 (pILInst
->SrcReg
[1].File
== PROGRAM_ENV_PARAM
) ||
876 (pILInst
->SrcReg
[1].File
== PROGRAM_STATE_VAR
) )
878 bSrcConst
[1] = GL_TRUE
;
882 bSrcConst
[1] = GL_FALSE
;
885 if( (bSrcConst
[0] == GL_TRUE
) && (bSrcConst
[1] == GL_TRUE
) )
887 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[1].Index
)
889 if( GL_FALSE
== mov_temp(pAsm
, 1) )
899 GLboolean
checkop3(r700_AssemblerBase
* pAsm
)
901 GLboolean bSrcConst
[3];
902 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
906 if( (pILInst
->SrcReg
[0].File
== PROGRAM_CONSTANT
) ||
907 (pILInst
->SrcReg
[0].File
== PROGRAM_LOCAL_PARAM
) ||
908 (pILInst
->SrcReg
[0].File
== PROGRAM_ENV_PARAM
) ||
909 (pILInst
->SrcReg
[0].File
== PROGRAM_STATE_VAR
) )
911 bSrcConst
[0] = GL_TRUE
;
915 bSrcConst
[0] = GL_FALSE
;
917 if( (pILInst
->SrcReg
[1].File
== PROGRAM_CONSTANT
) ||
918 (pILInst
->SrcReg
[1].File
== PROGRAM_LOCAL_PARAM
) ||
919 (pILInst
->SrcReg
[1].File
== PROGRAM_ENV_PARAM
) ||
920 (pILInst
->SrcReg
[1].File
== PROGRAM_STATE_VAR
) )
922 bSrcConst
[1] = GL_TRUE
;
926 bSrcConst
[1] = GL_FALSE
;
928 if( (pILInst
->SrcReg
[2].File
== PROGRAM_CONSTANT
) ||
929 (pILInst
->SrcReg
[2].File
== PROGRAM_LOCAL_PARAM
) ||
930 (pILInst
->SrcReg
[2].File
== PROGRAM_ENV_PARAM
) ||
931 (pILInst
->SrcReg
[2].File
== PROGRAM_STATE_VAR
) )
933 bSrcConst
[2] = GL_TRUE
;
937 bSrcConst
[2] = GL_FALSE
;
940 if( (GL_TRUE
== bSrcConst
[0]) &&
941 (GL_TRUE
== bSrcConst
[1]) &&
942 (GL_TRUE
== bSrcConst
[2]) )
944 if( GL_FALSE
== mov_temp(pAsm
, 1) )
948 if( GL_FALSE
== mov_temp(pAsm
, 2) )
955 else if( (GL_TRUE
== bSrcConst
[0]) &&
956 (GL_TRUE
== bSrcConst
[1]) )
958 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[1].Index
)
960 if( GL_FALSE
== mov_temp(pAsm
, 1) )
968 else if ( (GL_TRUE
== bSrcConst
[0]) &&
969 (GL_TRUE
== bSrcConst
[2]) )
971 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[2].Index
)
973 if( GL_FALSE
== mov_temp(pAsm
, 2) )
981 else if( (GL_TRUE
== bSrcConst
[1]) &&
982 (GL_TRUE
== bSrcConst
[2]) )
984 if(pILInst
->SrcReg
[1].Index
!= pILInst
->SrcReg
[2].Index
)
986 if( GL_FALSE
== mov_temp(pAsm
, 2) )
998 GLboolean
assemble_src(r700_AssemblerBase
*pAsm
,
1002 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1009 if(pAsm
->aArgSubst
[1+src
] >= 0)
1011 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1012 pAsm
->S
[fld
].src
.rtype
= SRC_REG_TEMPORARY
;
1013 pAsm
->S
[fld
].src
.reg
= pAsm
->aArgSubst
[1+src
];
1017 switch (pILInst
->SrcReg
[src
].File
)
1019 case PROGRAM_TEMPORARY
:
1020 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1021 pAsm
->S
[fld
].src
.rtype
= SRC_REG_TEMPORARY
;
1022 pAsm
->S
[fld
].src
.reg
= pILInst
->SrcReg
[src
].Index
+ pAsm
->starting_temp_register_number
;
1024 case PROGRAM_CONSTANT
:
1025 case PROGRAM_LOCAL_PARAM
:
1026 case PROGRAM_ENV_PARAM
:
1027 case PROGRAM_STATE_VAR
:
1028 if (1 == pILInst
->SrcReg
[src
].RelAddr
)
1030 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_RELATIVE_A0
);
1034 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1037 pAsm
->S
[fld
].src
.rtype
= SRC_REG_CONSTANT
;
1038 pAsm
->S
[fld
].src
.reg
= pILInst
->SrcReg
[src
].Index
;
1041 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1042 pAsm
->S
[fld
].src
.rtype
= SRC_REG_INPUT
;
1043 switch (pAsm
->currentShaderType
)
1046 pAsm
->S
[fld
].src
.reg
= pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[src
].Index
];
1049 pAsm
->S
[fld
].src
.reg
= pAsm
->ucVP_AttributeMap
[pILInst
->SrcReg
[src
].Index
];
1054 radeon_error("Invalid source argument type\n");
1059 pAsm
->S
[fld
].src
.swizzlex
= pILInst
->SrcReg
[src
].Swizzle
& 0x7;
1060 pAsm
->S
[fld
].src
.swizzley
= (pILInst
->SrcReg
[src
].Swizzle
>> 3) & 0x7;
1061 pAsm
->S
[fld
].src
.swizzlez
= (pILInst
->SrcReg
[src
].Swizzle
>> 6) & 0x7;
1062 pAsm
->S
[fld
].src
.swizzlew
= (pILInst
->SrcReg
[src
].Swizzle
>> 9) & 0x7;
1064 pAsm
->S
[fld
].src
.negx
= pILInst
->SrcReg
[src
].Negate
& 0x1;
1065 pAsm
->S
[fld
].src
.negy
= (pILInst
->SrcReg
[src
].Negate
>> 1) & 0x1;
1066 pAsm
->S
[fld
].src
.negz
= (pILInst
->SrcReg
[src
].Negate
>> 2) & 0x1;
1067 pAsm
->S
[fld
].src
.negw
= (pILInst
->SrcReg
[src
].Negate
>> 3) & 0x1;
1072 GLboolean
assemble_dst(r700_AssemblerBase
*pAsm
)
1074 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1075 switch (pILInst
->DstReg
.File
)
1077 case PROGRAM_TEMPORARY
:
1078 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1079 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1080 pAsm
->D
.dst
.reg
= pILInst
->DstReg
.Index
+ pAsm
->starting_temp_register_number
;
1082 case PROGRAM_ADDRESS
:
1083 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1084 pAsm
->D
.dst
.rtype
= DST_REG_A0
;
1085 pAsm
->D
.dst
.reg
= 0;
1087 case PROGRAM_OUTPUT
:
1088 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1089 pAsm
->D
.dst
.rtype
= DST_REG_OUT
;
1090 switch (pAsm
->currentShaderType
)
1093 pAsm
->D
.dst
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->DstReg
.Index
];
1096 pAsm
->D
.dst
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->DstReg
.Index
];
1101 radeon_error("Invalid destination output argument type\n");
1105 pAsm
->D
.dst
.writex
= pILInst
->DstReg
.WriteMask
& 0x1;
1106 pAsm
->D
.dst
.writey
= (pILInst
->DstReg
.WriteMask
>> 1) & 0x1;
1107 pAsm
->D
.dst
.writez
= (pILInst
->DstReg
.WriteMask
>> 2) & 0x1;
1108 pAsm
->D
.dst
.writew
= (pILInst
->DstReg
.WriteMask
>> 3) & 0x1;
1113 GLboolean
tex_dst(r700_AssemblerBase
*pAsm
)
1115 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1117 if(PROGRAM_TEMPORARY
== pILInst
->DstReg
.File
)
1119 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1120 pAsm
->D
.dst
.reg
= pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.Index
+ pAsm
->starting_temp_register_number
;
1122 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1124 else if(PROGRAM_OUTPUT
== pILInst
->DstReg
.File
)
1126 pAsm
->D
.dst
.rtype
= DST_REG_OUT
;
1127 switch (pAsm
->currentShaderType
)
1130 pAsm
->D
.dst
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->DstReg
.Index
];
1133 pAsm
->D
.dst
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->DstReg
.Index
];
1137 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1141 radeon_error("Invalid destination output argument type\n");
1145 pAsm
->D
.dst
.writex
= pILInst
->DstReg
.WriteMask
& 0x1;
1146 pAsm
->D
.dst
.writey
= (pILInst
->DstReg
.WriteMask
>> 1) & 0x1;
1147 pAsm
->D
.dst
.writez
= (pILInst
->DstReg
.WriteMask
>> 2) & 0x1;
1148 pAsm
->D
.dst
.writew
= (pILInst
->DstReg
.WriteMask
>> 3) & 0x1;
1153 GLboolean
tex_src(r700_AssemblerBase
*pAsm
)
1155 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1157 GLboolean bValidTexCoord
= GL_FALSE
;
1159 if(pAsm
->aArgSubst
[1] >= 0)
1161 bValidTexCoord
= GL_TRUE
;
1162 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
1163 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
1164 pAsm
->S
[0].src
.reg
= pAsm
->aArgSubst
[1];
1168 switch (pILInst
->SrcReg
[0].File
) {
1169 case PROGRAM_CONSTANT
:
1170 case PROGRAM_LOCAL_PARAM
:
1171 case PROGRAM_ENV_PARAM
:
1172 case PROGRAM_STATE_VAR
:
1174 case PROGRAM_TEMPORARY
:
1175 bValidTexCoord
= GL_TRUE
;
1176 pAsm
->S
[0].src
.reg
= pILInst
->SrcReg
[0].Index
+
1177 pAsm
->starting_temp_register_number
;
1178 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
1181 switch (pILInst
->SrcReg
[0].Index
)
1183 case FRAG_ATTRIB_WPOS
:
1184 case FRAG_ATTRIB_COL0
:
1185 case FRAG_ATTRIB_COL1
:
1186 case FRAG_ATTRIB_FOGC
:
1187 case FRAG_ATTRIB_TEX0
:
1188 case FRAG_ATTRIB_TEX1
:
1189 case FRAG_ATTRIB_TEX2
:
1190 case FRAG_ATTRIB_TEX3
:
1191 case FRAG_ATTRIB_TEX4
:
1192 case FRAG_ATTRIB_TEX5
:
1193 case FRAG_ATTRIB_TEX6
:
1194 case FRAG_ATTRIB_TEX7
:
1195 bValidTexCoord
= GL_TRUE
;
1196 pAsm
->S
[0].src
.reg
=
1197 pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[0].Index
];
1198 pAsm
->S
[0].src
.rtype
= SRC_REG_INPUT
;
1200 case FRAG_ATTRIB_FACE
:
1201 fprintf(stderr
, "FRAG_ATTRIB_FACE unsupported\n");
1203 case FRAG_ATTRIB_PNTC
:
1204 fprintf(stderr
, "FRAG_ATTRIB_PNTC unsupported\n");
1206 case FRAG_ATTRIB_VAR0
:
1207 fprintf(stderr
, "FRAG_ATTRIB_VAR0 unsupported\n");
1214 if(GL_TRUE
== bValidTexCoord
)
1216 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
1220 radeon_error("Invalid source texcoord for TEX instruction\n");
1224 pAsm
->S
[0].src
.swizzlex
= pILInst
->SrcReg
[0].Swizzle
& 0x7;
1225 pAsm
->S
[0].src
.swizzley
= (pILInst
->SrcReg
[0].Swizzle
>> 3) & 0x7;
1226 pAsm
->S
[0].src
.swizzlez
= (pILInst
->SrcReg
[0].Swizzle
>> 6) & 0x7;
1227 pAsm
->S
[0].src
.swizzlew
= (pILInst
->SrcReg
[0].Swizzle
>> 9) & 0x7;
1229 pAsm
->S
[0].src
.negx
= pILInst
->SrcReg
[0].Negate
& 0x1;
1230 pAsm
->S
[0].src
.negy
= (pILInst
->SrcReg
[0].Negate
>> 1) & 0x1;
1231 pAsm
->S
[0].src
.negz
= (pILInst
->SrcReg
[0].Negate
>> 2) & 0x1;
1232 pAsm
->S
[0].src
.negw
= (pILInst
->SrcReg
[0].Negate
>> 3) & 0x1;
1237 GLboolean
assemble_tex_instruction(r700_AssemblerBase
*pAsm
, GLboolean normalized
)
1239 PVSSRC
* texture_coordinate_source
;
1240 PVSSRC
* texture_unit_source
;
1242 R700TextureInstruction
* tex_instruction_ptr
= (R700TextureInstruction
*) CALLOC_STRUCT(R700TextureInstruction
);
1243 if (tex_instruction_ptr
== NULL
)
1247 Init_R700TextureInstruction(tex_instruction_ptr
);
1249 texture_coordinate_source
= &(pAsm
->S
[0].src
);
1250 texture_unit_source
= &(pAsm
->S
[1].src
);
1252 tex_instruction_ptr
->m_Word0
.f
.tex_inst
= pAsm
->D
.dst
.opcode
;
1253 tex_instruction_ptr
->m_Word0
.f
.bc_frac_mode
= 0x0;
1254 tex_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
1256 tex_instruction_ptr
->m_Word0
.f
.resource_id
= texture_unit_source
->reg
;
1258 tex_instruction_ptr
->m_Word1
.f
.lod_bias
= 0x0;
1260 tex_instruction_ptr
->m_Word1
.f
.coord_type_x
= SQ_TEX_NORMALIZED
;
1261 tex_instruction_ptr
->m_Word1
.f
.coord_type_y
= SQ_TEX_NORMALIZED
;
1262 tex_instruction_ptr
->m_Word1
.f
.coord_type_z
= SQ_TEX_NORMALIZED
;
1263 tex_instruction_ptr
->m_Word1
.f
.coord_type_w
= SQ_TEX_NORMALIZED
;
1265 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
1266 tex_instruction_ptr
->m_Word1
.f
.coord_type_x
= SQ_TEX_UNNORMALIZED
;
1267 tex_instruction_ptr
->m_Word1
.f
.coord_type_y
= SQ_TEX_UNNORMALIZED
;
1268 tex_instruction_ptr
->m_Word1
.f
.coord_type_z
= SQ_TEX_UNNORMALIZED
;
1269 tex_instruction_ptr
->m_Word1
.f
.coord_type_w
= SQ_TEX_UNNORMALIZED
;
1272 tex_instruction_ptr
->m_Word2
.f
.offset_x
= 0x0;
1273 tex_instruction_ptr
->m_Word2
.f
.offset_y
= 0x0;
1274 tex_instruction_ptr
->m_Word2
.f
.offset_z
= 0x0;
1276 tex_instruction_ptr
->m_Word2
.f
.sampler_id
= texture_unit_source
->reg
;
1279 if ( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
1280 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
1282 tex_instruction_ptr
->m_Word0
.f
.src_gpr
= texture_coordinate_source
->reg
;
1283 tex_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
1285 tex_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
1286 tex_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
;
1288 tex_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (pAsm
->D
.dst
.writex
? texture_unit_source
->swizzlex
: SQ_SEL_MASK
);
1289 tex_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (pAsm
->D
.dst
.writey
? texture_unit_source
->swizzley
: SQ_SEL_MASK
);
1290 tex_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (pAsm
->D
.dst
.writez
? texture_unit_source
->swizzlez
: SQ_SEL_MASK
);
1291 tex_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (pAsm
->D
.dst
.writew
? texture_unit_source
->swizzlew
: SQ_SEL_MASK
);
1294 tex_instruction_ptr
->m_Word2
.f
.src_sel_x
= texture_coordinate_source
->swizzlex
;
1295 tex_instruction_ptr
->m_Word2
.f
.src_sel_y
= texture_coordinate_source
->swizzley
;
1296 tex_instruction_ptr
->m_Word2
.f
.src_sel_z
= texture_coordinate_source
->swizzlez
;
1297 tex_instruction_ptr
->m_Word2
.f
.src_sel_w
= texture_coordinate_source
->swizzlew
;
1301 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
1305 if( GL_FALSE
== add_tex_instruction(pAsm
, tex_instruction_ptr
) )
1313 void initialize(r700_AssemblerBase
*pAsm
)
1315 GLuint cycle
, component
;
1317 for (cycle
=0; cycle
<NUMBER_OF_CYCLES
; cycle
++)
1319 for (component
=0; component
<NUMBER_OF_COMPONENTS
; component
++)
1321 pAsm
->hw_gpr
[cycle
][component
] = (-1);
1324 for (component
=0; component
<NUMBER_OF_COMPONENTS
; component
++)
1326 pAsm
->hw_cfile_addr
[component
] = (-1);
1327 pAsm
->hw_cfile_chan
[component
] = (-1);
1331 GLboolean
assemble_alu_src(R700ALUInstruction
* alu_instruction_ptr
,
1334 BITS scalar_channel_index
)
1341 //--------------------------------------------------------------------------
1342 // Source for operands src0, src1.
1343 // Values [0,127] correspond to GPR[0..127].
1344 // Values [256,511] correspond to cfile constants c[0..255].
1346 //--------------------------------------------------------------------------
1347 // Other special values are shown in the list below.
1349 // 248 SQ_ALU_SRC_0: special constant 0.0.
1350 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
1352 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1353 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1355 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1356 // 253 SQ_ALU_SRC_LITERAL: literal constant.
1358 // 254 SQ_ALU_SRC_PV: previous vector result.
1359 // 255 SQ_ALU_SRC_PS: previous scalar result.
1360 //--------------------------------------------------------------------------
1362 BITS channel_swizzle
;
1363 switch (scalar_channel_index
)
1365 case 0: channel_swizzle
= pSource
->swizzlex
; break;
1366 case 1: channel_swizzle
= pSource
->swizzley
; break;
1367 case 2: channel_swizzle
= pSource
->swizzlez
; break;
1368 case 3: channel_swizzle
= pSource
->swizzlew
; break;
1369 default: channel_swizzle
= SQ_SEL_MASK
; break;
1372 if(channel_swizzle
== SQ_SEL_0
)
1374 src_sel
= SQ_ALU_SRC_0
;
1376 else if (channel_swizzle
== SQ_SEL_1
)
1378 src_sel
= SQ_ALU_SRC_1
;
1382 if ( (pSource
->rtype
== SRC_REG_TEMPORARY
) ||
1383 (pSource
->rtype
== SRC_REG_INPUT
)
1386 src_sel
= pSource
->reg
;
1388 else if (pSource
->rtype
== SRC_REG_CONSTANT
)
1390 src_sel
= pSource
->reg
+ CFILE_REGISTER_OFFSET
;
1394 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
1395 source_index
, pSource
->rtype
);
1400 if( ADDR_ABSOLUTE
== addrmode_PVSSRC(pSource
) )
1402 src_rel
= SQ_ABSOLUTE
;
1406 src_rel
= SQ_RELATIVE
;
1409 switch (channel_swizzle
)
1412 src_chan
= SQ_CHAN_X
;
1415 src_chan
= SQ_CHAN_Y
;
1418 src_chan
= SQ_CHAN_Z
;
1421 src_chan
= SQ_CHAN_W
;
1425 // Does not matter since src_sel controls
1426 src_chan
= SQ_CHAN_X
;
1429 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle
);
1434 switch (scalar_channel_index
)
1436 case 0: src_neg
= pSource
->negx
; break;
1437 case 1: src_neg
= pSource
->negy
; break;
1438 case 2: src_neg
= pSource
->negz
; break;
1439 case 3: src_neg
= pSource
->negw
; break;
1440 default: src_neg
= 0; break;
1443 switch (source_index
)
1446 alu_instruction_ptr
->m_Word0
.f
.src0_sel
= src_sel
;
1447 alu_instruction_ptr
->m_Word0
.f
.src0_rel
= src_rel
;
1448 alu_instruction_ptr
->m_Word0
.f
.src0_chan
= src_chan
;
1449 alu_instruction_ptr
->m_Word0
.f
.src0_neg
= src_neg
;
1452 alu_instruction_ptr
->m_Word0
.f
.src1_sel
= src_sel
;
1453 alu_instruction_ptr
->m_Word0
.f
.src1_rel
= src_rel
;
1454 alu_instruction_ptr
->m_Word0
.f
.src1_chan
= src_chan
;
1455 alu_instruction_ptr
->m_Word0
.f
.src1_neg
= src_neg
;
1458 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_sel
= src_sel
;
1459 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_rel
= src_rel
;
1460 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_chan
= src_chan
;
1461 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_neg
= src_neg
;
1464 radeon_error("Only three sources allowed in ALU opcodes.\n");
1472 GLboolean
add_alu_instruction(r700_AssemblerBase
* pAsm
,
1473 R700ALUInstruction
* alu_instruction_ptr
,
1474 GLuint contiguous_slots_needed
)
1476 if( GL_FALSE
== check_current_clause(pAsm
, CF_ALU_CLAUSE
) )
1481 if ( pAsm
->cf_current_alu_clause_ptr
== NULL
||
1482 ( (pAsm
->cf_current_alu_clause_ptr
!= NULL
) &&
1483 (pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
>= (GetCFMaxInstructions(pAsm
->cf_current_alu_clause_ptr
->m_ShaderInstType
)-contiguous_slots_needed
-1) )
1487 //new cf inst for this clause
1488 pAsm
->cf_current_alu_clause_ptr
= (R700ControlFlowALUClause
*) CALLOC_STRUCT(R700ControlFlowALUClause
);
1490 // link the new cf to cf segment
1491 if(NULL
!= pAsm
->cf_current_alu_clause_ptr
)
1493 Init_R700ControlFlowALUClause(pAsm
->cf_current_alu_clause_ptr
);
1494 AddCFInstruction( pAsm
->pR700Shader
,
1495 (R700ControlFlowInstruction
*)pAsm
->cf_current_alu_clause_ptr
);
1499 radeon_error("Could not allocate a new ALU CF instruction.\n");
1503 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank0
= 0x0;
1504 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank1
= 0x0;
1505 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_mode0
= SQ_CF_KCACHE_NOP
;
1507 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_mode1
= SQ_CF_KCACHE_NOP
;
1508 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr0
= 0x0;
1509 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr1
= 0x0;
1511 //cf_current_alu_clause_ptr->m_Word1.f.count = number_of_scalar_operations - 1;
1512 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
= 0x0;
1513 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_ALU
;
1515 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
1517 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
1521 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
++;
1524 // If this clause constains any instruction that is forward dependent on a TEX instruction,
1525 // set the whole_quad_mode for this clause
1526 if ( pAsm
->pInstDeps
[pAsm
->uiCurInst
].nDstDep
> (-1) )
1528 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x1;
1531 if (pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
>= (GetCFMaxInstructions(pAsm
->cf_current_alu_clause_ptr
->m_ShaderInstType
)-1) )
1533 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
1536 if(NULL
== pAsm
->cf_current_alu_clause_ptr
->m_pLinkedALUInstruction
)
1538 pAsm
->cf_current_alu_clause_ptr
->m_pLinkedALUInstruction
= alu_instruction_ptr
;
1539 alu_instruction_ptr
->m_pLinkedALUClause
= pAsm
->cf_current_alu_clause_ptr
;
1542 AddALUInstruction(pAsm
->pR700Shader
, alu_instruction_ptr
);
1547 void get_src_properties(R700ALUInstruction
* alu_instruction_ptr
,
1554 switch (source_index
)
1557 *psrc_sel
= alu_instruction_ptr
->m_Word0
.f
.src0_sel
;
1558 *psrc_rel
= alu_instruction_ptr
->m_Word0
.f
.src0_rel
;
1559 *psrc_chan
= alu_instruction_ptr
->m_Word0
.f
.src0_chan
;
1560 *psrc_neg
= alu_instruction_ptr
->m_Word0
.f
.src0_neg
;
1564 *psrc_sel
= alu_instruction_ptr
->m_Word0
.f
.src1_sel
;
1565 *psrc_rel
= alu_instruction_ptr
->m_Word0
.f
.src1_rel
;
1566 *psrc_chan
= alu_instruction_ptr
->m_Word0
.f
.src1_chan
;
1567 *psrc_neg
= alu_instruction_ptr
->m_Word0
.f
.src1_neg
;
1571 *psrc_sel
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_sel
;
1572 *psrc_rel
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_rel
;
1573 *psrc_chan
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_chan
;
1574 *psrc_neg
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_neg
;
1579 int is_cfile(BITS sel
)
1581 if (sel
> 255 && sel
< 512)
1588 int is_const(BITS sel
)
1594 else if(sel
>= SQ_ALU_SRC_0
&& sel
<= SQ_ALU_SRC_LITERAL
)
1601 int is_gpr(BITS sel
)
1603 if (sel
>= 0 && sel
< 128)
1610 const GLuint BANK_SWIZZLE_VEC
[8] = {SQ_ALU_VEC_210
, //000
1611 SQ_ALU_VEC_120
, //001
1612 SQ_ALU_VEC_102
, //010
1614 SQ_ALU_VEC_201
, //011
1615 SQ_ALU_VEC_012
, //100
1616 SQ_ALU_VEC_021
, //101
1618 SQ_ALU_VEC_012
, //110
1619 SQ_ALU_VEC_012
}; //111
1621 const GLuint BANK_SWIZZLE_SCL
[8] = {SQ_ALU_SCL_210
, //000
1622 SQ_ALU_SCL_122
, //001
1623 SQ_ALU_SCL_122
, //010
1625 SQ_ALU_SCL_221
, //011
1626 SQ_ALU_SCL_212
, //100
1627 SQ_ALU_SCL_122
, //101
1629 SQ_ALU_SCL_122
, //110
1630 SQ_ALU_SCL_122
}; //111
1632 GLboolean
reserve_cfile(r700_AssemblerBase
* pAsm
,
1636 int res_match
= (-1);
1637 int res_empty
= (-1);
1641 for (res
=3; res
>=0; res
--)
1643 if(pAsm
->hw_cfile_addr
[ res
] < 0)
1647 else if( (pAsm
->hw_cfile_addr
[res
] == (int)sel
)
1649 (pAsm
->hw_cfile_chan
[ res
] == (int) chan
) )
1657 // Read for this scalar component already reserved, nothing to do here.
1660 else if(res_empty
>= 0)
1662 pAsm
->hw_cfile_addr
[ res_empty
] = sel
;
1663 pAsm
->hw_cfile_chan
[ res_empty
] = chan
;
1667 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
1673 GLboolean
reserve_gpr(r700_AssemblerBase
* pAsm
, GLuint sel
, GLuint chan
, GLuint cycle
)
1675 if(pAsm
->hw_gpr
[cycle
][chan
] < 0)
1677 pAsm
->hw_gpr
[cycle
][chan
] = sel
;
1679 else if(pAsm
->hw_gpr
[cycle
][chan
] != (int)sel
)
1681 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
1688 GLboolean
cycle_for_scalar_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
)
1692 case SQ_ALU_SCL_210
:
1694 int table
[3] = {2, 1, 0};
1695 *pCycle
= table
[sel
];
1699 case SQ_ALU_SCL_122
:
1701 int table
[3] = {1, 2, 2};
1702 *pCycle
= table
[sel
];
1706 case SQ_ALU_SCL_212
:
1708 int table
[3] = {2, 1, 2};
1709 *pCycle
= table
[sel
];
1713 case SQ_ALU_SCL_221
:
1715 int table
[3] = {2, 2, 1};
1716 *pCycle
= table
[sel
];
1721 radeon_error("Bad Scalar bank swizzle value\n");
1728 GLboolean
cycle_for_vector_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
)
1732 case SQ_ALU_VEC_012
:
1734 int table
[3] = {0, 1, 2};
1735 *pCycle
= table
[sel
];
1738 case SQ_ALU_VEC_021
:
1740 int table
[3] = {0, 2, 1};
1741 *pCycle
= table
[sel
];
1744 case SQ_ALU_VEC_120
:
1746 int table
[3] = {1, 2, 0};
1747 *pCycle
= table
[sel
];
1750 case SQ_ALU_VEC_102
:
1752 int table
[3] = {1, 0, 2};
1753 *pCycle
= table
[sel
];
1756 case SQ_ALU_VEC_201
:
1758 int table
[3] = {2, 0, 1};
1759 *pCycle
= table
[sel
];
1762 case SQ_ALU_VEC_210
:
1764 int table
[3] = {2, 1, 0};
1765 *pCycle
= table
[sel
];
1769 radeon_error("Bad Vec bank swizzle value\n");
1777 GLboolean
check_scalar(r700_AssemblerBase
* pAsm
,
1778 R700ALUInstruction
* alu_instruction_ptr
)
1781 GLuint bank_swizzle
;
1782 GLuint const_count
= 0;
1791 BITS src_sel
[3] = {0,0,0};
1792 BITS src_chan
[3] = {0,0,0};
1793 BITS src_rel
[3] = {0,0,0};
1794 BITS src_neg
[3] = {0,0,0};
1798 GLuint number_of_operands
= r700GetNumOperands(pAsm
);
1800 for (src
=0; src
<number_of_operands
; src
++)
1802 get_src_properties(alu_instruction_ptr
,
1811 swizzle_key
= ( (is_const( src_sel
[0] ) ? 4 : 0) +
1812 (is_const( src_sel
[1] ) ? 2 : 0) +
1813 (is_const( src_sel
[2] ) ? 1 : 0) );
1815 alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
= BANK_SWIZZLE_SCL
[ swizzle_key
];
1817 for (src
=0; src
<number_of_operands
; src
++)
1819 sel
= src_sel
[src
];
1820 chan
= src_chan
[src
];
1821 rel
= src_rel
[src
];
1822 neg
= src_neg
[src
];
1824 if (is_const( sel
))
1826 // Any constant, including literal and inline constants
1829 if (is_cfile( sel
))
1831 reserve_cfile(pAsm
, sel
, chan
);
1837 for (src
=0; src
<number_of_operands
; src
++)
1839 sel
= src_sel
[src
];
1840 chan
= src_chan
[src
];
1841 rel
= src_rel
[src
];
1842 neg
= src_neg
[src
];
1846 bank_swizzle
= alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
;
1848 if( GL_FALSE
== cycle_for_scalar_bank_swizzle(bank_swizzle
, src
, &cycle
) )
1853 if(cycle
< const_count
)
1855 if( GL_FALSE
== reserve_gpr(pAsm
, sel
, chan
, cycle
) )
1866 GLboolean
check_vector(r700_AssemblerBase
* pAsm
,
1867 R700ALUInstruction
* alu_instruction_ptr
)
1870 GLuint bank_swizzle
;
1871 GLuint const_count
= 0;
1880 BITS src_sel
[3] = {0,0,0};
1881 BITS src_chan
[3] = {0,0,0};
1882 BITS src_rel
[3] = {0,0,0};
1883 BITS src_neg
[3] = {0,0,0};
1887 GLuint number_of_operands
= r700GetNumOperands(pAsm
);
1889 for (src
=0; src
<number_of_operands
; src
++)
1891 get_src_properties(alu_instruction_ptr
,
1900 swizzle_key
= ( (is_const( src_sel
[0] ) ? 4 : 0) +
1901 (is_const( src_sel
[1] ) ? 2 : 0) +
1902 (is_const( src_sel
[2] ) ? 1 : 0)
1905 alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
= BANK_SWIZZLE_VEC
[swizzle_key
];
1907 for (src
=0; src
<number_of_operands
; src
++)
1909 sel
= src_sel
[src
];
1910 chan
= src_chan
[src
];
1911 rel
= src_rel
[src
];
1912 neg
= src_neg
[src
];
1915 bank_swizzle
= alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
;
1919 if( GL_FALSE
== cycle_for_vector_bank_swizzle(bank_swizzle
, src
, &cycle
) )
1925 (sel
== src_sel
[0]) &&
1926 (chan
== src_chan
[0]) )
1931 if( GL_FALSE
== reserve_gpr(pAsm
, sel
, chan
, cycle
) )
1937 else if( is_const(sel
) )
1943 if( GL_FALSE
== reserve_cfile(pAsm
, sel
, chan
) )
1954 GLboolean
assemble_alu_instruction(r700_AssemblerBase
*pAsm
)
1956 GLuint number_of_scalar_operations
;
1957 GLboolean is_single_scalar_operation
;
1958 GLuint scalar_channel_index
;
1960 PVSSRC
* pcurrent_source
;
1961 int current_source_index
;
1962 GLuint contiguous_slots_needed
;
1964 GLuint uNumSrc
= r700GetNumOperands(pAsm
);
1965 GLuint channel_swizzle
, j
;
1966 GLuint chan_counter
[4] = {0, 0, 0, 0};
1967 PVSSRC
* pSource
[3];
1968 GLboolean bSplitInst
= GL_FALSE
;
1970 if (1 == pAsm
->D
.dst
.math
)
1972 is_single_scalar_operation
= GL_TRUE
;
1973 number_of_scalar_operations
= 1;
1977 is_single_scalar_operation
= GL_FALSE
;
1978 number_of_scalar_operations
= 4;
1980 /* current assembler doesn't do more than 1 register per source */
1982 /* check read port, only very preliminary algorithm, not count in
1983 src0/1 same comp case and prev slot repeat case; also not count relative
1984 addressing. TODO: improve performance. */
1985 for(j
=0; j
<uNumSrc
; j
++)
1987 pSource
[j
] = &(pAsm
->S
[j
].src
);
1989 for(scalar_channel_index
=0; scalar_channel_index
<4; scalar_channel_index
++)
1991 for(j
=0; j
<uNumSrc
; j
++)
1993 switch (scalar_channel_index
)
1995 case 0: channel_swizzle
= pSource
[j
]->swizzlex
; break;
1996 case 1: channel_swizzle
= pSource
[j
]->swizzley
; break;
1997 case 2: channel_swizzle
= pSource
[j
]->swizzlez
; break;
1998 case 3: channel_swizzle
= pSource
[j
]->swizzlew
; break;
1999 default: channel_swizzle
= SQ_SEL_MASK
; break;
2001 if ( ((pSource
[j
]->rtype
== SRC_REG_TEMPORARY
) ||
2002 (pSource
[j
]->rtype
== SRC_REG_INPUT
))
2003 && (channel_swizzle
<= SQ_SEL_W
) )
2005 chan_counter
[channel_swizzle
]++;
2009 if( (chan_counter
[SQ_SEL_X
] > 3)
2010 || (chan_counter
[SQ_SEL_Y
] > 3)
2011 || (chan_counter
[SQ_SEL_Z
] > 3)
2012 || (chan_counter
[SQ_SEL_W
] > 3) ) /* each chan bank has only 3 ports. */
2014 bSplitInst
= GL_TRUE
;
2019 contiguous_slots_needed
= 0;
2021 if(GL_TRUE
== is_reduction_opcode(&(pAsm
->D
)) )
2023 contiguous_slots_needed
= 4;
2028 for (scalar_channel_index
=0;
2029 scalar_channel_index
< number_of_scalar_operations
;
2030 scalar_channel_index
++)
2032 R700ALUInstruction
* alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2033 if (alu_instruction_ptr
== NULL
)
2037 Init_R700ALUInstruction(alu_instruction_ptr
);
2040 current_source_index
= 0;
2041 pcurrent_source
= &(pAsm
->S
[0].src
);
2043 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2044 current_source_index
,
2046 scalar_channel_index
) )
2054 current_source_index
= 1;
2055 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2057 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2058 current_source_index
,
2060 scalar_channel_index
) )
2067 alu_instruction_ptr
->m_Word0
.f
.index_mode
= SQ_INDEX_LOOP
;
2069 if( (is_single_scalar_operation
== GL_TRUE
)
2070 || (GL_TRUE
== bSplitInst
) )
2072 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
2076 alu_instruction_ptr
->m_Word0
.f
.last
= (scalar_channel_index
== 3) ? 1 : 0;
2079 alu_instruction_ptr
->m_Word0
.f
.pred_sel
= 0x0;
2080 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0x0;
2081 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0x0;
2084 if( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
2085 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
2087 alu_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
2091 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2095 alu_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
; //D.rtype
2097 if ( is_single_scalar_operation
== GL_TRUE
)
2099 // Override scalar_channel_index since only one scalar value will be written
2100 if(pAsm
->D
.dst
.writex
)
2102 scalar_channel_index
= 0;
2104 else if(pAsm
->D
.dst
.writey
)
2106 scalar_channel_index
= 1;
2108 else if(pAsm
->D
.dst
.writez
)
2110 scalar_channel_index
= 2;
2112 else if(pAsm
->D
.dst
.writew
)
2114 scalar_channel_index
= 3;
2118 alu_instruction_ptr
->m_Word1
.f
.dst_chan
= scalar_channel_index
;
2120 alu_instruction_ptr
->m_Word1
.f
.clamp
= pAsm
->pILInst
[pAsm
->uiCurInst
].SaturateMode
;
2122 if (pAsm
->D
.dst
.op3
)
2126 alu_instruction_ptr
->m_Word1_OP3
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2128 //There's 3rd src for op3
2129 current_source_index
= 2;
2130 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2132 if ( GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2133 current_source_index
,
2135 scalar_channel_index
) )
2145 alu_instruction_ptr
->m_Word1_OP2
.f6
.alu_inst
= pAsm
->D
.dst
.opcode
;
2147 alu_instruction_ptr
->m_Word1_OP2
.f6
.src0_abs
= 0x0;
2148 alu_instruction_ptr
->m_Word1_OP2
.f6
.src1_abs
= 0x0;
2150 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2151 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2152 switch (scalar_channel_index
)
2155 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writex
;
2158 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writey
;
2161 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writez
;
2164 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writew
;
2167 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= 1; //SQ_SEL_MASK;
2170 alu_instruction_ptr
->m_Word1_OP2
.f6
.omod
= SQ_ALU_OMOD_OFF
;
2174 alu_instruction_ptr
->m_Word1_OP2
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2176 alu_instruction_ptr
->m_Word1_OP2
.f
.src0_abs
= 0x0;
2177 alu_instruction_ptr
->m_Word1_OP2
.f
.src1_abs
= 0x0;
2179 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2180 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2181 switch (scalar_channel_index
)
2184 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writex
;
2187 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writey
;
2190 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writez
;
2193 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writew
;
2196 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= 1; //SQ_SEL_MASK;
2199 alu_instruction_ptr
->m_Word1_OP2
.f
.omod
= SQ_ALU_OMOD_OFF
;
2203 if(GL_FALSE
== add_alu_instruction(pAsm
, alu_instruction_ptr
, contiguous_slots_needed
) )
2209 * Judge the type of current instruction, is it vector or scalar
2212 if (is_single_scalar_operation
)
2214 if(GL_FALSE
== check_scalar(pAsm
, alu_instruction_ptr
) )
2221 if(GL_FALSE
== check_vector(pAsm
, alu_instruction_ptr
) )
2227 contiguous_slots_needed
= 0;
2233 GLboolean
next_ins(r700_AssemblerBase
*pAsm
)
2235 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
2237 if( GL_TRUE
== pAsm
->is_tex
)
2239 if (pILInst
->TexSrcTarget
== TEXTURE_RECT_INDEX
) {
2240 if( GL_FALSE
== assemble_tex_instruction(pAsm
, GL_FALSE
) )
2242 radeon_error("Error assembling TEX instruction\n");
2246 if( GL_FALSE
== assemble_tex_instruction(pAsm
, GL_TRUE
) )
2248 radeon_error("Error assembling TEX instruction\n");
2255 if( GL_FALSE
== assemble_alu_instruction(pAsm
) )
2257 radeon_error("Error assembling ALU instruction\n");
2262 if(pAsm
->D
.dst
.rtype
== DST_REG_OUT
)
2266 // There is no mask for OP3 instructions, so all channels are written
2267 pAsm
->pucOutMask
[pAsm
->D
.dst
.reg
- pAsm
->starting_export_register_number
] = 0xF;
2271 pAsm
->pucOutMask
[pAsm
->D
.dst
.reg
- pAsm
->starting_export_register_number
]
2272 |= (unsigned char)pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
;
2276 //reset for next inst.
2278 pAsm
->S
[0].bits
= 0;
2279 pAsm
->S
[1].bits
= 0;
2280 pAsm
->S
[2].bits
= 0;
2281 pAsm
->is_tex
= GL_FALSE
;
2282 pAsm
->need_tex_barrier
= GL_FALSE
;
2286 GLboolean
assemble_math_function(r700_AssemblerBase
* pAsm
, BITS opcode
)
2292 tmp
= gethelpr(pAsm
);
2294 // opcode tmp.x, a.x
2297 pAsm
->D
.dst
.opcode
= opcode
;
2298 pAsm
->D
.dst
.math
= 1;
2300 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2301 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2302 pAsm
->D
.dst
.reg
= tmp
;
2303 pAsm
->D
.dst
.writex
= 1;
2305 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2310 if ( GL_FALSE
== next_ins(pAsm
) )
2315 // Now replicate result to all necessary channels in destination
2316 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2318 if( GL_FALSE
== assemble_dst(pAsm
) )
2323 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2324 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
2325 pAsm
->S
[0].src
.reg
= tmp
;
2327 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
2328 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2330 if( GL_FALSE
== next_ins(pAsm
) )
2338 GLboolean
assemble_ABS(r700_AssemblerBase
*pAsm
)
2342 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
2344 if( GL_FALSE
== assemble_dst(pAsm
) )
2348 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2353 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
2354 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
2356 if ( GL_FALSE
== next_ins(pAsm
) )
2364 GLboolean
assemble_ADD(r700_AssemblerBase
*pAsm
)
2366 if( GL_FALSE
== checkop2(pAsm
) )
2371 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
2373 if( GL_FALSE
== assemble_dst(pAsm
) )
2378 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2383 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
2388 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_SUB
)
2390 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
2393 if( GL_FALSE
== next_ins(pAsm
) )
2401 GLboolean
assemble_BAD(char *opcode_str
)
2403 radeon_error("Not yet implemented instruction (%s)\n", opcode_str
);
2407 GLboolean
assemble_CMP(r700_AssemblerBase
*pAsm
)
2411 if( GL_FALSE
== checkop3(pAsm
) )
2416 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_CNDGE
;
2417 pAsm
->D
.dst
.op3
= 1;
2421 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
2423 //OP3 has no support for write mask
2424 tmp
= gethelpr(pAsm
);
2426 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2427 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2428 pAsm
->D
.dst
.reg
= tmp
;
2430 nomask_PVSDST(&(pAsm
->D
.dst
));
2434 if( GL_FALSE
== assemble_dst(pAsm
) )
2440 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2445 if( GL_FALSE
== assemble_src(pAsm
, 2, 1) )
2450 if( GL_FALSE
== assemble_src(pAsm
, 1, 2) )
2455 if ( GL_FALSE
== next_ins(pAsm
) )
2460 if (0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
2462 if( GL_FALSE
== assemble_dst(pAsm
) )
2467 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2470 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2471 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2472 pAsm
->S
[0].src
.reg
= tmp
;
2474 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2475 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
2477 if( GL_FALSE
== next_ins(pAsm
) )
2486 GLboolean
assemble_COS(r700_AssemblerBase
*pAsm
)
2488 return assemble_math_function(pAsm
, SQ_OP2_INST_COS
);
2491 GLboolean
assemble_DOT(r700_AssemblerBase
*pAsm
)
2493 if( GL_FALSE
== checkop2(pAsm
) )
2498 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_DOT4
;
2500 if( GL_FALSE
== assemble_dst(pAsm
) )
2505 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2510 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
2515 if(OPCODE_DP3
== pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
)
2517 zerocomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
2518 zerocomp_PVSSRC(&(pAsm
->S
[1].src
), 3);
2520 else if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_DPH
)
2522 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
2525 if ( GL_FALSE
== next_ins(pAsm
) )
2533 GLboolean
assemble_DST(r700_AssemblerBase
*pAsm
)
2535 if( GL_FALSE
== checkop2(pAsm
) )
2540 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
2542 if( GL_FALSE
== assemble_dst(pAsm
) )
2547 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2552 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
2557 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 0);
2558 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
2560 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 0);
2561 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 2);
2563 if ( GL_FALSE
== next_ins(pAsm
) )
2571 GLboolean
assemble_EX2(r700_AssemblerBase
*pAsm
)
2573 return assemble_math_function(pAsm
, SQ_OP2_INST_EXP_IEEE
);
2576 GLboolean
assemble_FLR(r700_AssemblerBase
*pAsm
)
2580 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
2582 if ( GL_FALSE
== assemble_dst(pAsm
) )
2587 if ( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2592 if ( GL_FALSE
== next_ins(pAsm
) )
2600 GLboolean
assemble_FLR_INT(r700_AssemblerBase
*pAsm
)
2602 return assemble_math_function(pAsm
, SQ_OP2_INST_FLT_TO_INT
);
2605 GLboolean
assemble_FRC(r700_AssemblerBase
*pAsm
)
2609 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
2611 if ( GL_FALSE
== assemble_dst(pAsm
) )
2616 if ( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2621 if ( GL_FALSE
== next_ins(pAsm
) )
2629 GLboolean
assemble_KIL(r700_AssemblerBase
*pAsm
)
2631 /* TODO: doc says KILL has to be last(end) ALU clause */
2635 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_KILLGT
;
2637 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2638 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2639 pAsm
->D
.dst
.reg
= 0;
2640 pAsm
->D
.dst
.writex
= 0;
2641 pAsm
->D
.dst
.writey
= 0;
2642 pAsm
->D
.dst
.writez
= 0;
2643 pAsm
->D
.dst
.writew
= 0;
2645 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2646 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2647 pAsm
->S
[0].src
.reg
= 0;
2649 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_0
);
2650 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2652 if ( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
2657 if ( GL_FALSE
== next_ins(pAsm
) )
2662 pAsm
->pR700Shader
->killIsUsed
= GL_TRUE
;
2667 GLboolean
assemble_LG2(r700_AssemblerBase
*pAsm
)
2669 return assemble_math_function(pAsm
, SQ_OP2_INST_LOG_IEEE
);
2672 GLboolean
assemble_LRP(r700_AssemblerBase
*pAsm
)
2676 if( GL_FALSE
== checkop3(pAsm
) )
2681 tmp
= gethelpr(pAsm
);
2683 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
2685 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2686 pAsm
->D
.dst
.reg
= tmp
;
2687 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2688 nomask_PVSDST(&(pAsm
->D
.dst
));
2691 if( GL_FALSE
== assemble_src(pAsm
, 1, 0) )
2696 if ( GL_FALSE
== assemble_src(pAsm
, 2, 1) )
2701 neg_PVSSRC(&(pAsm
->S
[1].src
));
2703 if( GL_FALSE
== next_ins(pAsm
) )
2708 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
2709 pAsm
->D
.dst
.op3
= 1;
2711 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2712 pAsm
->D
.dst
.reg
= tmp
;
2713 nomask_PVSDST(&(pAsm
->D
.dst
));
2714 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2716 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2717 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2718 pAsm
->S
[0].src
.reg
= tmp
;
2719 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
2722 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
2726 if( GL_FALSE
== assemble_src(pAsm
, 2, -1) )
2731 if( GL_FALSE
== next_ins(pAsm
) )
2736 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2738 if( GL_FALSE
== assemble_dst(pAsm
) )
2743 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2744 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2745 pAsm
->S
[0].src
.reg
= tmp
;
2746 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
2748 if( GL_FALSE
== next_ins(pAsm
) )
2756 GLboolean
assemble_MAD(struct r700_AssemblerBase
*pAsm
)
2759 GLboolean bReplaceDst
= GL_FALSE
;
2760 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
2762 if( GL_FALSE
== checkop3(pAsm
) )
2767 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
2768 pAsm
->D
.dst
.op3
= 1;
2772 if(PROGRAM_TEMPORARY
== pILInst
->DstReg
.File
)
2773 { /* TODO : more investigation on MAD src and dst using same register */
2774 for(ii
=0; ii
<3; ii
++)
2776 if( (PROGRAM_TEMPORARY
== pILInst
->SrcReg
[ii
].File
)
2777 && (pILInst
->DstReg
.Index
== pILInst
->SrcReg
[ii
].Index
) )
2779 bReplaceDst
= GL_TRUE
;
2784 if(0xF != pILInst
->DstReg
.WriteMask
)
2785 { /* OP3 has no support for write mask */
2786 bReplaceDst
= GL_TRUE
;
2789 if(GL_TRUE
== bReplaceDst
)
2791 tmp
= gethelpr(pAsm
);
2793 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2794 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2795 pAsm
->D
.dst
.reg
= tmp
;
2797 nomask_PVSDST(&(pAsm
->D
.dst
));
2801 if( GL_FALSE
== assemble_dst(pAsm
) )
2807 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2812 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
2817 if( GL_FALSE
== assemble_src(pAsm
, 2, -1) )
2822 if ( GL_FALSE
== next_ins(pAsm
) )
2827 if (GL_TRUE
== bReplaceDst
)
2829 if( GL_FALSE
== assemble_dst(pAsm
) )
2834 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2837 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2838 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2839 pAsm
->S
[0].src
.reg
= tmp
;
2841 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2842 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
2844 if( GL_FALSE
== next_ins(pAsm
) )
2854 GLboolean
assemble_LIT(r700_AssemblerBase
*pAsm
)
2856 unsigned int dstReg
;
2857 unsigned int dstType
;
2858 unsigned int srcReg
;
2859 unsigned int srcType
;
2861 int tmp
= gethelpr(pAsm
);
2863 if( GL_FALSE
== assemble_dst(pAsm
) )
2867 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2871 dstReg
= pAsm
->D
.dst
.reg
;
2872 dstType
= pAsm
->D
.dst
.rtype
;
2873 srcReg
= pAsm
->S
[0].src
.reg
;
2874 srcType
= pAsm
->S
[0].src
.rtype
;
2876 /* dst.xw, <- 1.0 */
2877 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2878 pAsm
->D
.dst
.rtype
= dstType
;
2879 pAsm
->D
.dst
.reg
= dstReg
;
2880 pAsm
->D
.dst
.writex
= 1;
2881 pAsm
->D
.dst
.writey
= 0;
2882 pAsm
->D
.dst
.writez
= 0;
2883 pAsm
->D
.dst
.writew
= 1;
2884 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2885 pAsm
->S
[0].src
.reg
= tmp
;
2886 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2887 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2888 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_1
;
2889 pAsm
->S
[0].src
.swizzley
= SQ_SEL_1
;
2890 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_1
;
2891 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_1
;
2892 if( GL_FALSE
== next_ins(pAsm
) )
2897 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2902 /* dst.y = max(src.x, 0.0) */
2903 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
2904 pAsm
->D
.dst
.rtype
= dstType
;
2905 pAsm
->D
.dst
.reg
= dstReg
;
2906 pAsm
->D
.dst
.writex
= 0;
2907 pAsm
->D
.dst
.writey
= 1;
2908 pAsm
->D
.dst
.writez
= 0;
2909 pAsm
->D
.dst
.writew
= 0;
2910 pAsm
->S
[0].src
.rtype
= srcType
;
2911 pAsm
->S
[0].src
.reg
= srcReg
;
2912 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2913 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
);
2914 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
2915 pAsm
->S
[1].src
.reg
= tmp
;
2916 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
2917 noneg_PVSSRC(&(pAsm
->S
[1].src
));
2918 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_0
;
2919 pAsm
->S
[1].src
.swizzley
= SQ_SEL_0
;
2920 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_0
;
2921 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_0
;
2922 if( GL_FALSE
== next_ins(pAsm
) )
2927 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2932 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Y
, SQ_SEL_Y
, SQ_SEL_Y
, SQ_SEL_Y
);
2934 /* dst.z = log(src.y) */
2935 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_CLAMPED
;
2936 pAsm
->D
.dst
.math
= 1;
2937 pAsm
->D
.dst
.rtype
= dstType
;
2938 pAsm
->D
.dst
.reg
= dstReg
;
2939 pAsm
->D
.dst
.writex
= 0;
2940 pAsm
->D
.dst
.writey
= 0;
2941 pAsm
->D
.dst
.writez
= 1;
2942 pAsm
->D
.dst
.writew
= 0;
2943 pAsm
->S
[0].src
.rtype
= srcType
;
2944 pAsm
->S
[0].src
.reg
= srcReg
;
2945 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2946 if( GL_FALSE
== next_ins(pAsm
) )
2951 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2956 if( GL_FALSE
== assemble_src(pAsm
, 0, 2) )
2961 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
);
2963 swizzleagain_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
);
2965 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
2966 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MUL_LIT
;
2967 pAsm
->D
.dst
.math
= 1;
2968 pAsm
->D
.dst
.op3
= 1;
2969 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2970 pAsm
->D
.dst
.reg
= tmp
;
2971 pAsm
->D
.dst
.writex
= 1;
2972 pAsm
->D
.dst
.writey
= 0;
2973 pAsm
->D
.dst
.writez
= 0;
2974 pAsm
->D
.dst
.writew
= 0;
2976 pAsm
->S
[0].src
.rtype
= srcType
;
2977 pAsm
->S
[0].src
.reg
= srcReg
;
2978 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2980 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
2981 pAsm
->S
[1].src
.reg
= dstReg
;
2982 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
2983 noneg_PVSSRC(&(pAsm
->S
[1].src
));
2984 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_Z
;
2985 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Z
;
2986 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
2987 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_Z
;
2989 pAsm
->S
[2].src
.rtype
= srcType
;
2990 pAsm
->S
[2].src
.reg
= srcReg
;
2991 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
), ADDR_ABSOLUTE
);
2993 if( GL_FALSE
== next_ins(pAsm
) )
2998 /* dst.z = exp(tmp.x) */
2999 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3000 pAsm
->D
.dst
.math
= 1;
3001 pAsm
->D
.dst
.rtype
= dstType
;
3002 pAsm
->D
.dst
.reg
= dstReg
;
3003 pAsm
->D
.dst
.writex
= 0;
3004 pAsm
->D
.dst
.writey
= 0;
3005 pAsm
->D
.dst
.writez
= 1;
3006 pAsm
->D
.dst
.writew
= 0;
3008 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3009 pAsm
->S
[0].src
.reg
= tmp
;
3010 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3011 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3012 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
3013 pAsm
->S
[0].src
.swizzley
= SQ_SEL_X
;
3014 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_X
;
3015 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_X
;
3017 if( GL_FALSE
== next_ins(pAsm
) )
3025 GLboolean
assemble_MAX(r700_AssemblerBase
*pAsm
)
3027 if( GL_FALSE
== checkop2(pAsm
) )
3032 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3034 if( GL_FALSE
== assemble_dst(pAsm
) )
3039 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3044 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3049 if( GL_FALSE
== next_ins(pAsm
) )
3057 GLboolean
assemble_MIN(r700_AssemblerBase
*pAsm
)
3059 if( GL_FALSE
== checkop2(pAsm
) )
3064 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MIN
;
3066 if( GL_FALSE
== assemble_dst(pAsm
) )
3071 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3076 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3081 if( GL_FALSE
== next_ins(pAsm
) )
3089 GLboolean
assemble_MOV(r700_AssemblerBase
*pAsm
)
3093 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3095 if (GL_FALSE
== assemble_dst(pAsm
))
3100 if (GL_FALSE
== assemble_src(pAsm
, 0, -1))
3105 if ( GL_FALSE
== next_ins(pAsm
) )
3113 GLboolean
assemble_MUL(r700_AssemblerBase
*pAsm
)
3115 if( GL_FALSE
== checkop2(pAsm
) )
3120 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3122 if( GL_FALSE
== assemble_dst(pAsm
) )
3127 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3132 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3137 if( GL_FALSE
== next_ins(pAsm
) )
3145 GLboolean
assemble_POW(r700_AssemblerBase
*pAsm
)
3151 tmp
= gethelpr(pAsm
);
3153 // LG2 tmp.x, a.swizzle
3154 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_IEEE
;
3155 pAsm
->D
.dst
.math
= 1;
3157 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3158 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3159 pAsm
->D
.dst
.reg
= tmp
;
3160 nomask_PVSDST(&(pAsm
->D
.dst
));
3162 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3167 if( GL_FALSE
== next_ins(pAsm
) )
3172 // MUL tmp.x, tmp.x, b.swizzle
3173 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3175 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3176 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3177 pAsm
->D
.dst
.reg
= tmp
;
3178 nomask_PVSDST(&(pAsm
->D
.dst
));
3180 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3181 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3182 pAsm
->S
[0].src
.reg
= tmp
;
3183 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3184 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3186 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3191 if( GL_FALSE
== next_ins(pAsm
) )
3196 // EX2 dst.mask, tmp.x
3198 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3199 pAsm
->D
.dst
.math
= 1;
3201 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3202 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3203 pAsm
->D
.dst
.reg
= tmp
;
3204 nomask_PVSDST(&(pAsm
->D
.dst
));
3206 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3207 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3208 pAsm
->S
[0].src
.reg
= tmp
;
3209 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3210 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3212 if( GL_FALSE
== next_ins(pAsm
) )
3217 // Now replicate result to all necessary channels in destination
3218 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3220 if( GL_FALSE
== assemble_dst(pAsm
) )
3225 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3226 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3227 pAsm
->S
[0].src
.reg
= tmp
;
3229 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3230 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3232 if( GL_FALSE
== next_ins(pAsm
) )
3240 GLboolean
assemble_RCP(r700_AssemblerBase
*pAsm
)
3242 return assemble_math_function(pAsm
, SQ_OP2_INST_RECIP_IEEE
);
3245 GLboolean
assemble_RSQ(r700_AssemblerBase
*pAsm
)
3247 return assemble_math_function(pAsm
, SQ_OP2_INST_RECIPSQRT_IEEE
);
3250 GLboolean
assemble_SIN(r700_AssemblerBase
*pAsm
)
3252 return assemble_math_function(pAsm
, SQ_OP2_INST_SIN
);
3255 GLboolean
assemble_SCS(r700_AssemblerBase
*pAsm
)
3261 tmp
= gethelpr(pAsm
);
3264 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_COS
;
3265 pAsm
->D
.dst
.math
= 1;
3267 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3268 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3269 pAsm
->D
.dst
.reg
= tmp
;
3270 pAsm
->D
.dst
.writex
= 1;
3272 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3277 if ( GL_FALSE
== next_ins(pAsm
) )
3283 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SIN
;
3284 pAsm
->D
.dst
.math
= 1;
3286 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3287 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3288 pAsm
->D
.dst
.reg
= tmp
;
3289 pAsm
->D
.dst
.writey
= 1;
3291 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3296 if( GL_FALSE
== next_ins(pAsm
) )
3301 // MOV dst.mask, tmp
3302 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3304 if( GL_FALSE
== assemble_dst(pAsm
) )
3309 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3310 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3311 pAsm
->S
[0].src
.reg
= tmp
;
3313 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3314 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_0
;
3315 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_0
;
3317 if ( GL_FALSE
== next_ins(pAsm
) )
3325 GLboolean
assemble_SGE(r700_AssemblerBase
*pAsm
)
3327 if( GL_FALSE
== checkop2(pAsm
) )
3332 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SETGE
;
3334 if( GL_FALSE
== assemble_dst(pAsm
) )
3339 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3344 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3349 if( GL_FALSE
== next_ins(pAsm
) )
3357 GLboolean
assemble_SLT(r700_AssemblerBase
*pAsm
)
3359 if( GL_FALSE
== checkop2(pAsm
) )
3364 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SETGT
;
3366 if( GL_FALSE
== assemble_dst(pAsm
) )
3371 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
3376 if( GL_FALSE
== assemble_src(pAsm
, 1, 0) )
3381 if( GL_FALSE
== next_ins(pAsm
) )
3389 GLboolean
assemble_STP(r700_AssemblerBase
*pAsm
)
3394 GLboolean
assemble_TEX(r700_AssemblerBase
*pAsm
)
3396 GLboolean src_const
;
3397 GLboolean need_barrier
= GL_FALSE
;
3401 switch (pAsm
->pILInst
[pAsm
->uiCurInst
].SrcReg
[0].File
)
3403 case PROGRAM_CONSTANT
:
3404 case PROGRAM_LOCAL_PARAM
:
3405 case PROGRAM_ENV_PARAM
:
3406 case PROGRAM_STATE_VAR
:
3407 src_const
= GL_TRUE
;
3409 case PROGRAM_TEMPORARY
:
3412 src_const
= GL_FALSE
;
3416 if (GL_TRUE
== src_const
)
3418 if ( GL_FALSE
== mov_temp(pAsm
, 0) )
3420 need_barrier
= GL_TRUE
;
3423 switch (pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
)
3428 radeon_error("do not support TXB yet\n");
3434 radeon_error("Internal error: bad texture op (not TEX)\n");
3439 if (pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_TXP
)
3441 GLuint tmp
= gethelpr(pAsm
);
3442 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_RECIP_IEEE
;
3443 pAsm
->D
.dst
.math
= 1;
3444 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3445 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3446 pAsm
->D
.dst
.reg
= tmp
;
3447 pAsm
->D
.dst
.writew
= 1;
3449 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3453 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
);
3454 if( GL_FALSE
== next_ins(pAsm
) )
3459 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3460 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3461 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3462 pAsm
->D
.dst
.reg
= tmp
;
3463 pAsm
->D
.dst
.writex
= 1;
3464 pAsm
->D
.dst
.writey
= 1;
3465 pAsm
->D
.dst
.writez
= 1;
3466 pAsm
->D
.dst
.writew
= 0;
3468 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3472 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3473 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3474 pAsm
->S
[1].src
.reg
= tmp
;
3475 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_W
);
3477 if( GL_FALSE
== next_ins(pAsm
) )
3482 pAsm
->aArgSubst
[1] = tmp
;
3483 need_barrier
= GL_TRUE
;
3486 if (pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcTarget
== TEXTURE_CUBE_INDEX
)
3488 GLuint tmp1
= gethelpr(pAsm
);
3489 GLuint tmp2
= gethelpr(pAsm
);
3491 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
3492 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_CUBE
;
3493 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3494 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3495 pAsm
->D
.dst
.reg
= tmp1
;
3496 nomask_PVSDST(&(pAsm
->D
.dst
));
3498 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3503 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
3508 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Z
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
);
3509 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Y
, SQ_SEL_X
, SQ_SEL_Z
, SQ_SEL_Z
);
3511 if( GL_FALSE
== next_ins(pAsm
) )
3516 /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently
3517 * have to do explicit instruction
3519 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3520 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3521 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3522 pAsm
->D
.dst
.reg
= tmp1
;
3523 pAsm
->D
.dst
.writez
= 1;
3525 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3526 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3527 pAsm
->S
[0].src
.reg
= tmp1
;
3528 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3529 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
3530 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
3534 /* tmp1.z = RCP_e(|tmp1.z|) */
3535 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_RECIP_IEEE
;
3536 pAsm
->D
.dst
.math
= 1;
3537 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3538 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3539 pAsm
->D
.dst
.reg
= tmp1
;
3540 pAsm
->D
.dst
.writez
= 1;
3542 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3543 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3544 pAsm
->S
[0].src
.reg
= tmp1
;
3545 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_Z
;
3549 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
3550 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
3551 * muladd has no writemask, have to use another temp
3552 * also no support for imm constants, so add 1 here
3554 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
3555 pAsm
->D
.dst
.op3
= 1;
3556 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3557 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3558 pAsm
->D
.dst
.reg
= tmp2
;
3560 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3561 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3562 pAsm
->S
[0].src
.reg
= tmp1
;
3563 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3564 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3565 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3566 pAsm
->S
[1].src
.reg
= tmp1
;
3567 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Z
);
3568 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
), ADDR_ABSOLUTE
);
3569 pAsm
->S
[2].src
.rtype
= SRC_REG_TEMPORARY
;
3570 pAsm
->S
[2].src
.reg
= tmp1
;
3571 setswizzle_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_1
);
3575 /* ADD the remaining .5 */
3576 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
3577 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3578 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3579 pAsm
->D
.dst
.reg
= tmp2
;
3580 pAsm
->D
.dst
.writex
= 1;
3581 pAsm
->D
.dst
.writey
= 1;
3582 pAsm
->D
.dst
.writez
= 0;
3583 pAsm
->D
.dst
.writew
= 0;
3585 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3586 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3587 pAsm
->S
[0].src
.reg
= tmp2
;
3588 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3589 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3590 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3591 pAsm
->S
[1].src
.reg
= 252; // SQ_ALU_SRC_0_5
3592 noswizzle_PVSSRC(&(pAsm
->S
[1].src
));
3596 /* tmp1.xy = temp2.xy */
3597 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3598 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3599 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3600 pAsm
->D
.dst
.reg
= tmp1
;
3601 pAsm
->D
.dst
.writex
= 1;
3602 pAsm
->D
.dst
.writey
= 1;
3603 pAsm
->D
.dst
.writez
= 0;
3604 pAsm
->D
.dst
.writew
= 0;
3606 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3607 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3608 pAsm
->S
[0].src
.reg
= tmp2
;
3609 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3612 pAsm
->aArgSubst
[1] = tmp1
;
3613 need_barrier
= GL_TRUE
;
3617 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE
;
3618 pAsm
->is_tex
= GL_TRUE
;
3619 if ( GL_TRUE
== need_barrier
)
3621 pAsm
->need_tex_barrier
= GL_TRUE
;
3623 // Set src1 to tex unit id
3624 pAsm
->S
[1].src
.reg
= pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcUnit
;
3625 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3627 //No sw info from mesa compiler, so hard code here.
3628 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_X
;
3629 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Y
;
3630 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
3631 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_W
;
3633 if( GL_FALSE
== tex_dst(pAsm
) )
3638 if( GL_FALSE
== tex_src(pAsm
) )
3643 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_TXP
)
3645 /* hopefully did swizzles before */
3646 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3649 if(pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcTarget
== TEXTURE_CUBE_INDEX
)
3651 /* SAMPLE dst, tmp.yxwy, CUBE */
3652 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_Y
;
3653 pAsm
->S
[0].src
.swizzley
= SQ_SEL_X
;
3654 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_W
;
3655 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_Y
;
3658 if ( GL_FALSE
== next_ins(pAsm
) )
3666 GLboolean
assemble_XPD(r700_AssemblerBase
*pAsm
)
3670 if( GL_FALSE
== checkop2(pAsm
) )
3675 tmp
= gethelpr(pAsm
);
3677 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3679 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3680 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3681 pAsm
->D
.dst
.reg
= tmp
;
3682 nomask_PVSDST(&(pAsm
->D
.dst
));
3684 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3689 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3694 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
, SQ_SEL_0
);
3695 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Y
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_0
);
3697 if( GL_FALSE
== next_ins(pAsm
) )
3702 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
3703 pAsm
->D
.dst
.op3
= 1;
3705 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
3707 tmp
= gethelpr(pAsm
);
3709 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3710 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3711 pAsm
->D
.dst
.reg
= tmp
;
3713 nomask_PVSDST(&(pAsm
->D
.dst
));
3717 if( GL_FALSE
== assemble_dst(pAsm
) )
3723 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3728 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3733 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Y
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_0
);
3734 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
, SQ_SEL_0
);
3736 // result1 + (neg) result0
3737 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
),ADDR_ABSOLUTE
);
3738 pAsm
->S
[2].src
.rtype
= SRC_REG_TEMPORARY
;
3739 pAsm
->S
[2].src
.reg
= tmp
;
3741 neg_PVSSRC(&(pAsm
->S
[2].src
));
3742 noswizzle_PVSSRC(&(pAsm
->S
[2].src
));
3744 if( GL_FALSE
== next_ins(pAsm
) )
3750 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
3752 if( GL_FALSE
== assemble_dst(pAsm
) )
3757 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3759 // Use tmp as source
3760 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3761 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3762 pAsm
->S
[0].src
.reg
= tmp
;
3764 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3765 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3767 if( GL_FALSE
== next_ins(pAsm
) )
3776 GLboolean
assemble_EXPORT(r700_AssemblerBase
*pAsm
)
3781 GLboolean
assemble_IF(r700_AssemblerBase
*pAsm
)
3786 GLboolean
assemble_ENDIF(r700_AssemblerBase
*pAsm
)
3791 GLboolean
AssembleInstr(GLuint uiNumberInsts
,
3792 struct prog_instruction
*pILInst
,
3793 r700_AssemblerBase
*pR700AsmCode
)
3797 pR700AsmCode
->pILInst
= pILInst
;
3798 for(i
=0; i
<uiNumberInsts
; i
++)
3800 pR700AsmCode
->uiCurInst
= i
;
3802 switch (pILInst
[i
].Opcode
)
3805 if ( GL_FALSE
== assemble_ABS(pR700AsmCode
) )
3810 if ( GL_FALSE
== assemble_ADD(pR700AsmCode
) )
3815 radeon_error("Not yet implemented instruction OPCODE_ARL \n");
3816 //if ( GL_FALSE == assemble_BAD("ARL") )
3820 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
3821 //if ( GL_FALSE == assemble_BAD("ARR") )
3826 if ( GL_FALSE
== assemble_CMP(pR700AsmCode
) )
3830 if ( GL_FALSE
== assemble_COS(pR700AsmCode
) )
3837 if ( GL_FALSE
== assemble_DOT(pR700AsmCode
) )
3842 if ( GL_FALSE
== assemble_DST(pR700AsmCode
) )
3847 if ( GL_FALSE
== assemble_EX2(pR700AsmCode
) )
3851 radeon_error("Not yet implemented instruction OPCODE_EXP \n");
3852 //if ( GL_FALSE == assemble_BAD("EXP") )
3854 break; // approx of EX2
3857 if ( GL_FALSE
== assemble_FLR(pR700AsmCode
) )
3861 // if ( GL_FALSE == assemble_FLR_INT() )
3866 if ( GL_FALSE
== assemble_FRC(pR700AsmCode
) )
3871 if ( GL_FALSE
== assemble_KIL(pR700AsmCode
) )
3875 if ( GL_FALSE
== assemble_LG2(pR700AsmCode
) )
3879 if ( GL_FALSE
== assemble_LIT(pR700AsmCode
) )
3883 if ( GL_FALSE
== assemble_LRP(pR700AsmCode
) )
3887 radeon_error("Not yet implemented instruction OPCODE_LOG \n");
3888 //if ( GL_FALSE == assemble_BAD("LOG") )
3890 break; // approx of LG2
3893 if ( GL_FALSE
== assemble_MAD(pR700AsmCode
) )
3897 if ( GL_FALSE
== assemble_MAX(pR700AsmCode
) )
3901 if ( GL_FALSE
== assemble_MIN(pR700AsmCode
) )
3906 if ( GL_FALSE
== assemble_MOV(pR700AsmCode
) )
3910 if ( GL_FALSE
== assemble_MUL(pR700AsmCode
) )
3915 if ( GL_FALSE
== assemble_POW(pR700AsmCode
) )
3919 if ( GL_FALSE
== assemble_RCP(pR700AsmCode
) )
3923 if ( GL_FALSE
== assemble_RSQ(pR700AsmCode
) )
3927 if ( GL_FALSE
== assemble_SIN(pR700AsmCode
) )
3931 if ( GL_FALSE
== assemble_SCS(pR700AsmCode
) )
3936 if ( GL_FALSE
== assemble_SGE(pR700AsmCode
) )
3940 if ( GL_FALSE
== assemble_SLT(pR700AsmCode
) )
3945 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
3950 if ( GL_FALSE
== assemble_MOV(pR700AsmCode
) )
3956 if( (i
+1)<uiNumberInsts
)
3958 if(OPCODE_END
!= pILInst
[i
+1].Opcode
)
3960 if( GL_TRUE
== IsTex(pILInst
[i
+1].Opcode
) )
3962 pR700AsmCode
->pInstDeps
[i
+1].nDstDep
= i
+1; //=1?
3972 if ( GL_FALSE
== assemble_TEX(pR700AsmCode
) )
3977 if ( GL_FALSE
== assemble_XPD(pR700AsmCode
) )
3982 if ( GL_FALSE
== assemble_IF(pR700AsmCode
) )
3986 radeon_error("Not yet implemented instruction OPCODE_ELSE \n");
3987 //if ( GL_FALSE == assemble_BAD("ELSE") )
3991 if ( GL_FALSE
== assemble_ENDIF(pR700AsmCode
) )
3995 //case OPCODE_EXPORT:
3996 // if ( GL_FALSE == assemble_EXPORT() )
4001 //pR700AsmCode->uiCurInst = i;
4002 //This is to remaind that if in later exoort there is depth/stencil
4003 //export, we need a mov to re-arrange DST channel, where using a
4004 //psuedo inst, we will use this end inst to do it.
4008 radeon_error("internal: unknown instruction\n");
4016 GLboolean
Process_Export(r700_AssemblerBase
* pAsm
,
4018 GLuint export_starting_index
,
4019 GLuint export_count
,
4020 GLuint starting_register_number
,
4021 GLboolean is_depth_export
)
4023 unsigned char ucWriteMask
;
4025 check_current_clause(pAsm
, CF_EMPTY_CLAUSE
);
4026 check_current_clause(pAsm
, CF_EXPORT_CLAUSE
); //alloc the cf_current_export_clause_ptr
4028 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.type
= type
;
4032 case SQ_EXPORT_PIXEL
:
4033 if(GL_TRUE
== is_depth_export
)
4035 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_PIXEL_Z
;
4039 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_PIXEL_MRT0
+ export_starting_index
;
4044 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_POS_0
+ export_starting_index
;
4047 case SQ_EXPORT_PARAM
:
4048 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= 0x0 + export_starting_index
;
4052 radeon_error("Unknown export type: %d\n", type
);
4057 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.rw_gpr
= starting_register_number
;
4059 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.rw_rel
= SQ_ABSOLUTE
;
4060 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.index_gpr
= 0x0;
4061 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.elem_size
= 0x3;
4063 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.burst_count
= (export_count
- 1);
4064 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4065 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4066 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT
; // _DONE
4067 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4068 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4070 if (export_count
== 1)
4072 ucWriteMask
= pAsm
->pucOutMask
[starting_register_number
- pAsm
->starting_export_register_number
];
4073 /* exports Z as a float into Red channel */
4074 if (GL_TRUE
== is_depth_export
)
4077 if( (ucWriteMask
& 0x1) != 0)
4079 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_X
;
4083 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_MASK
;
4085 if( ((ucWriteMask
>>1) & 0x1) != 0)
4087 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_Y
;
4091 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_MASK
;
4093 if( ((ucWriteMask
>>2) & 0x1) != 0)
4095 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_Z
;
4099 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_MASK
;
4101 if( ((ucWriteMask
>>3) & 0x1) != 0)
4103 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_W
;
4107 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_MASK
;
4112 // This should only be used if all components for all registers have been written
4113 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_X
;
4114 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_Y
;
4115 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_Z
;
4116 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_W
;
4119 pAsm
->cf_last_export_ptr
= pAsm
->cf_current_export_clause_ptr
;
4124 GLboolean
Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase
*pAsm
, BITS depth_channel_select
)
4126 gl_inst_opcode Opcode_save
= pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
; //Should be OPCODE_END
4127 pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
= OPCODE_MOV
;
4129 // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
4131 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4133 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4134 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4135 pAsm
->D
.dst
.reg
= pAsm
->depth_export_register_number
;
4137 pAsm
->D
.dst
.writex
= 1; // depth goes in R channel for HW
4139 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4140 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
4141 pAsm
->S
[0].src
.reg
= pAsm
->depth_export_register_number
;
4143 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), depth_channel_select
);
4145 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4147 if( GL_FALSE
== next_ins(pAsm
) )
4152 pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
= Opcode_save
;
4157 GLboolean
Process_Fragment_Exports(r700_AssemblerBase
*pR700AsmCode
,
4158 GLbitfield OutputsWritten
)
4162 if(pR700AsmCode
->depth_export_register_number
>= 0)
4164 if( GL_FALSE
== Move_Depth_Exports_To_Correct_Channels(pR700AsmCode
, SQ_SEL_Z
) ) // depth
4170 unBit
= 1 << FRAG_RESULT_COLOR
;
4171 if(OutputsWritten
& unBit
)
4173 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4177 pR700AsmCode
->uiFP_OutputMap
[FRAG_RESULT_COLOR
],
4183 unBit
= 1 << FRAG_RESULT_DEPTH
;
4184 if(OutputsWritten
& unBit
)
4186 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4190 pR700AsmCode
->uiFP_OutputMap
[FRAG_RESULT_DEPTH
],
4197 if(pR700AsmCode
->cf_last_export_ptr
!= NULL
)
4199 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
4200 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.end_of_program
= 0x1;
4206 GLboolean
Process_Vertex_Exports(r700_AssemblerBase
*pR700AsmCode
,
4207 GLbitfield OutputsWritten
)
4212 GLuint export_starting_index
= 0;
4213 GLuint export_count
= pR700AsmCode
->number_of_exports
;
4215 unBit
= 1 << VERT_RESULT_HPOS
;
4216 if(OutputsWritten
& unBit
)
4218 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4220 export_starting_index
,
4222 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_HPOS
],
4230 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
4233 pR700AsmCode
->number_of_exports
= export_count
;
4235 unBit
= 1 << VERT_RESULT_COL0
;
4236 if(OutputsWritten
& unBit
)
4238 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4240 export_starting_index
,
4242 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_COL0
],
4248 export_starting_index
++;
4251 unBit
= 1 << VERT_RESULT_COL1
;
4252 if(OutputsWritten
& unBit
)
4254 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4256 export_starting_index
,
4258 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_COL1
],
4264 export_starting_index
++;
4267 unBit
= 1 << VERT_RESULT_FOGC
;
4268 if(OutputsWritten
& unBit
)
4270 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4272 export_starting_index
,
4274 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_FOGC
],
4280 export_starting_index
++;
4285 unBit
= 1 << (VERT_RESULT_TEX0
+ i
);
4286 if(OutputsWritten
& unBit
)
4288 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4290 export_starting_index
,
4292 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_TEX0
+ i
],
4298 export_starting_index
++;
4302 // At least one param should be exported
4305 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
4309 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4313 pR700AsmCode
->starting_export_register_number
,
4319 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_0
;
4320 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_0
;
4321 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_0
;
4322 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_1
;
4323 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
4326 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.end_of_program
= 0x1;
4331 GLboolean
Clean_Up_Assembler(r700_AssemblerBase
*pR700AsmCode
)
4333 FREE(pR700AsmCode
->pucOutMask
);
4334 FREE(pR700AsmCode
->pInstDeps
);