2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
33 #include "main/mtypes.h"
34 #include "main/imports.h"
36 #include "radeon_debug.h"
37 #include "r600_context.h"
39 #include "r700_assembler.h"
41 BITS
addrmode_PVSDST(PVSDST
* pPVSDST
)
43 return pPVSDST
->addrmode0
| ((BITS
)pPVSDST
->addrmode1
<< 1);
46 void setaddrmode_PVSDST(PVSDST
* pPVSDST
, BITS addrmode
)
48 pPVSDST
->addrmode0
= addrmode
& 1;
49 pPVSDST
->addrmode1
= (addrmode
>> 1) & 1;
52 void nomask_PVSDST(PVSDST
* pPVSDST
)
54 pPVSDST
->writex
= pPVSDST
->writey
= pPVSDST
->writez
= pPVSDST
->writew
= 1;
57 BITS
addrmode_PVSSRC(PVSSRC
* pPVSSRC
)
59 return pPVSSRC
->addrmode0
| ((BITS
)pPVSSRC
->addrmode1
<< 1);
62 void setaddrmode_PVSSRC(PVSSRC
* pPVSSRC
, BITS addrmode
)
64 pPVSSRC
->addrmode0
= addrmode
& 1;
65 pPVSSRC
->addrmode1
= (addrmode
>> 1) & 1;
69 void setswizzle_PVSSRC(PVSSRC
* pPVSSRC
, BITS swz
)
74 pPVSSRC
->swizzlew
= swz
;
77 void noswizzle_PVSSRC(PVSSRC
* pPVSSRC
)
79 pPVSSRC
->swizzlex
= SQ_SEL_X
;
80 pPVSSRC
->swizzley
= SQ_SEL_Y
;
81 pPVSSRC
->swizzlez
= SQ_SEL_Z
;
82 pPVSSRC
->swizzlew
= SQ_SEL_W
;
86 swizzleagain_PVSSRC(PVSSRC
* pPVSSRC
, BITS x
, BITS y
, BITS z
, BITS w
)
90 case SQ_SEL_X
: x
= pPVSSRC
->swizzlex
;
92 case SQ_SEL_Y
: x
= pPVSSRC
->swizzley
;
94 case SQ_SEL_Z
: x
= pPVSSRC
->swizzlez
;
96 case SQ_SEL_W
: x
= pPVSSRC
->swizzlew
;
103 case SQ_SEL_X
: y
= pPVSSRC
->swizzlex
;
105 case SQ_SEL_Y
: y
= pPVSSRC
->swizzley
;
107 case SQ_SEL_Z
: y
= pPVSSRC
->swizzlez
;
109 case SQ_SEL_W
: y
= pPVSSRC
->swizzlew
;
116 case SQ_SEL_X
: z
= pPVSSRC
->swizzlex
;
118 case SQ_SEL_Y
: z
= pPVSSRC
->swizzley
;
120 case SQ_SEL_Z
: z
= pPVSSRC
->swizzlez
;
122 case SQ_SEL_W
: z
= pPVSSRC
->swizzlew
;
129 case SQ_SEL_X
: w
= pPVSSRC
->swizzlex
;
131 case SQ_SEL_Y
: w
= pPVSSRC
->swizzley
;
133 case SQ_SEL_Z
: w
= pPVSSRC
->swizzlez
;
135 case SQ_SEL_W
: w
= pPVSSRC
->swizzlew
;
140 pPVSSRC
->swizzlex
= x
;
141 pPVSSRC
->swizzley
= y
;
142 pPVSSRC
->swizzlez
= z
;
143 pPVSSRC
->swizzlew
= w
;
146 void neg_PVSSRC(PVSSRC
* pPVSSRC
)
154 void noneg_PVSSRC(PVSSRC
* pPVSSRC
)
162 // negate argument (for SUB instead of ADD and alike)
163 void flipneg_PVSSRC(PVSSRC
* pPVSSRC
)
165 pPVSSRC
->negx
= !pPVSSRC
->negx
;
166 pPVSSRC
->negy
= !pPVSSRC
->negy
;
167 pPVSSRC
->negz
= !pPVSSRC
->negz
;
168 pPVSSRC
->negw
= !pPVSSRC
->negw
;
171 void zerocomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
)
175 case 0: pPVSSRC
->swizzlex
= SQ_SEL_0
; pPVSSRC
->negx
= 0; break;
176 case 1: pPVSSRC
->swizzley
= SQ_SEL_0
; pPVSSRC
->negy
= 0; break;
177 case 2: pPVSSRC
->swizzlez
= SQ_SEL_0
; pPVSSRC
->negz
= 0; break;
178 case 3: pPVSSRC
->swizzlew
= SQ_SEL_0
; pPVSSRC
->negw
= 0; break;
183 void onecomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
)
187 case 0: pPVSSRC
->swizzlex
= SQ_SEL_1
; pPVSSRC
->negx
= 0; break;
188 case 1: pPVSSRC
->swizzley
= SQ_SEL_1
; pPVSSRC
->negy
= 0; break;
189 case 2: pPVSSRC
->swizzlez
= SQ_SEL_1
; pPVSSRC
->negz
= 0; break;
190 case 3: pPVSSRC
->swizzlew
= SQ_SEL_1
; pPVSSRC
->negw
= 0; break;
195 BITS
is_misc_component_exported(VAP_OUT_VTX_FMT_0
* pOutVTXFmt0
)
197 return (pOutVTXFmt0
->point_size
|
198 pOutVTXFmt0
->edge_flag
|
199 pOutVTXFmt0
->rta_index
|
200 pOutVTXFmt0
->kill_flag
|
201 pOutVTXFmt0
->viewport_index
);
204 BITS
is_depth_component_exported(OUT_FRAGMENT_FMT_0
* pFPOutFmt
)
206 return (pFPOutFmt
->depth
|
207 pFPOutFmt
->stencil_ref
|
209 pFPOutFmt
->coverage_to_mask
);
212 GLboolean
is_reduction_opcode(PVSDWORD
* dest
)
214 if (dest
->dst
.op3
== 0)
216 if ( (dest
->dst
.opcode
== SQ_OP2_INST_DOT4
|| dest
->dst
.opcode
== SQ_OP2_INST_DOT4_IEEE
|| dest
->dst
.opcode
== SQ_OP2_INST_CUBE
) )
224 GLuint
GetSurfaceFormat(GLenum eType
, GLuint nChannels
, GLuint
* pClient_size
)
226 GLuint format
= FMT_INVALID
;
227 GLuint uiElemSize
= 0;
232 case GL_UNSIGNED_BYTE
:
237 format
= FMT_8
; break;
239 format
= FMT_8_8
; break;
241 format
= FMT_8_8_8
; break;
243 format
= FMT_8_8_8_8
; break;
249 case GL_UNSIGNED_SHORT
:
255 format
= FMT_16
; break;
257 format
= FMT_16_16
; break;
259 format
= FMT_16_16_16
; break;
261 format
= FMT_16_16_16_16
; break;
267 case GL_UNSIGNED_INT
:
273 format
= FMT_32
; break;
275 format
= FMT_32_32
; break;
277 format
= FMT_32_32_32
; break;
279 format
= FMT_32_32_32_32
; break;
290 format
= FMT_32_FLOAT
; break;
292 format
= FMT_32_32_FLOAT
; break;
294 format
= FMT_32_32_32_FLOAT
; break;
296 format
= FMT_32_32_32_32_FLOAT
; break;
306 format
= FMT_32_FLOAT
; break;
308 format
= FMT_32_32_FLOAT
; break;
310 format
= FMT_32_32_32_FLOAT
; break;
312 format
= FMT_32_32_32_32_FLOAT
; break;
319 //GL_ASSERT_NO_CASE();
322 if(NULL
!= pClient_size
)
324 *pClient_size
= uiElemSize
* nChannels
;
330 unsigned int r700GetNumOperands(r700_AssemblerBase
* pAsm
)
337 switch (pAsm
->D
.dst
.opcode
)
339 case SQ_OP2_INST_ADD
:
340 case SQ_OP2_INST_KILLGT
:
341 case SQ_OP2_INST_MUL
:
342 case SQ_OP2_INST_MAX
:
343 case SQ_OP2_INST_MIN
:
344 //case SQ_OP2_INST_MAX_DX10:
345 //case SQ_OP2_INST_MIN_DX10:
346 case SQ_OP2_INST_SETGT
:
347 case SQ_OP2_INST_SETGE
:
348 case SQ_OP2_INST_PRED_SETE
:
349 case SQ_OP2_INST_PRED_SETGT
:
350 case SQ_OP2_INST_PRED_SETGE
:
351 case SQ_OP2_INST_PRED_SETNE
:
352 case SQ_OP2_INST_DOT4
:
353 case SQ_OP2_INST_DOT4_IEEE
:
354 case SQ_OP2_INST_CUBE
:
357 case SQ_OP2_INST_MOV
:
358 case SQ_OP2_INST_MOVA_FLOOR
:
359 case SQ_OP2_INST_FRACT
:
360 case SQ_OP2_INST_FLOOR
:
361 case SQ_OP2_INST_EXP_IEEE
:
362 case SQ_OP2_INST_LOG_CLAMPED
:
363 case SQ_OP2_INST_LOG_IEEE
:
364 case SQ_OP2_INST_RECIP_IEEE
:
365 case SQ_OP2_INST_RECIPSQRT_IEEE
:
366 case SQ_OP2_INST_FLT_TO_INT
:
367 case SQ_OP2_INST_SIN
:
368 case SQ_OP2_INST_COS
:
371 default: radeon_error(
372 "Need instruction operand number for %x.\n", pAsm
->D
.dst
.opcode
);
378 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt
, r700_AssemblerBase
* pAsm
, R700_Shader
* pShader
)
382 Init_R700_Shader(pShader
);
383 pAsm
->pR700Shader
= pShader
;
384 pAsm
->currentShaderType
= spt
;
386 pAsm
->cf_last_export_ptr
= NULL
;
388 pAsm
->cf_current_export_clause_ptr
= NULL
;
389 pAsm
->cf_current_alu_clause_ptr
= NULL
;
390 pAsm
->cf_current_tex_clause_ptr
= NULL
;
391 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
392 pAsm
->cf_current_cf_clause_ptr
= NULL
;
394 // No clause has been created yet
395 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
397 pAsm
->number_of_colorandz_exports
= 0;
398 pAsm
->number_of_exports
= 0;
399 pAsm
->number_of_export_opcodes
= 0;
407 pAsm
->uLastPosUpdate
= 0;
409 *(BITS
*) &pAsm
->fp_stOutFmt0
= 0;
413 pAsm
->number_used_registers
= 0;
414 pAsm
->uUsedConsts
= 256;
418 pAsm
->uBoolConsts
= 0;
419 pAsm
->uIntConsts
= 0;
424 pAsm
->fc_stack
[0].type
= FC_NONE
;
426 pAsm
->branch_depth
= 0;
427 pAsm
->max_branch_depth
= 0;
432 pAsm
->aArgSubst
[3] = (-1);
436 for (i
=0; i
<NUMBER_OF_OUTPUT_COLORS
; i
++)
438 pAsm
->color_export_register_number
[i
] = (-1);
442 pAsm
->depth_export_register_number
= (-1);
443 pAsm
->stencil_export_register_number
= (-1);
444 pAsm
->coverage_to_mask_export_register_number
= (-1);
445 pAsm
->mask_export_register_number
= (-1);
447 pAsm
->starting_export_register_number
= 0;
448 pAsm
->starting_vfetch_register_number
= 0;
449 pAsm
->starting_temp_register_number
= 0;
450 pAsm
->uFirstHelpReg
= 0;
453 pAsm
->input_position_is_used
= GL_FALSE
;
454 pAsm
->input_normal_is_used
= GL_FALSE
;
457 for (i
=0; i
<NUMBER_OF_INPUT_COLORS
; i
++)
459 pAsm
->input_color_is_used
[ i
] = GL_FALSE
;
462 for (i
=0; i
<NUMBER_OF_TEXTURE_UNITS
; i
++)
464 pAsm
->input_texture_unit_is_used
[ i
] = GL_FALSE
;
467 for (i
=0; i
<VERT_ATTRIB_MAX
; i
++)
469 pAsm
->vfetch_instruction_ptr_array
[ i
] = NULL
;
472 pAsm
->number_of_inputs
= 0;
474 pAsm
->is_tex
= GL_FALSE
;
475 pAsm
->need_tex_barrier
= GL_FALSE
;
480 GLboolean
IsTex(gl_inst_opcode Opcode
)
482 if( (OPCODE_TEX
==Opcode
) || (OPCODE_TXP
==Opcode
) || (OPCODE_TXB
==Opcode
) )
489 GLboolean
IsAlu(gl_inst_opcode Opcode
)
491 //TODO : more for fc and ex for higher spec.
499 int check_current_clause(r700_AssemblerBase
* pAsm
,
500 CF_CLAUSE_TYPE new_clause_type
)
502 if (pAsm
->cf_current_clause_type
!= new_clause_type
)
503 { //Close last open clause
504 switch (pAsm
->cf_current_clause_type
)
507 if ( pAsm
->cf_current_alu_clause_ptr
!= NULL
)
509 pAsm
->cf_current_alu_clause_ptr
= NULL
;
513 if ( pAsm
->cf_current_vtx_clause_ptr
!= NULL
)
515 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
519 if ( pAsm
->cf_current_tex_clause_ptr
!= NULL
)
521 pAsm
->cf_current_tex_clause_ptr
= NULL
;
524 case CF_EXPORT_CLAUSE
:
525 if ( pAsm
->cf_current_export_clause_ptr
!= NULL
)
527 pAsm
->cf_current_export_clause_ptr
= NULL
;
530 case CF_OTHER_CLAUSE
:
531 if ( pAsm
->cf_current_cf_clause_ptr
!= NULL
)
533 pAsm
->cf_current_cf_clause_ptr
= NULL
;
536 case CF_EMPTY_CLAUSE
:
540 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type
);
544 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
547 switch (new_clause_type
)
550 pAsm
->cf_current_clause_type
= CF_ALU_CLAUSE
;
553 pAsm
->cf_current_clause_type
= CF_VTX_CLAUSE
;
556 pAsm
->cf_current_clause_type
= CF_TEX_CLAUSE
;
558 case CF_EXPORT_CLAUSE
:
560 R700ControlFlowSXClause
* pR700ControlFlowSXClause
561 = (R700ControlFlowSXClause
*) CALLOC_STRUCT(R700ControlFlowSXClause
);
563 // Add new export instruction to control flow program
564 if (pR700ControlFlowSXClause
!= 0)
566 pAsm
->cf_current_export_clause_ptr
= pR700ControlFlowSXClause
;
567 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause
);
568 AddCFInstruction( pAsm
->pR700Shader
,
569 (R700ControlFlowInstruction
*)pR700ControlFlowSXClause
);
574 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
577 pAsm
->cf_current_clause_type
= CF_EXPORT_CLAUSE
;
580 case CF_EMPTY_CLAUSE
:
582 case CF_OTHER_CLAUSE
:
583 pAsm
->cf_current_clause_type
= CF_OTHER_CLAUSE
;
587 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type
);
595 GLboolean
add_vfetch_instruction(r700_AssemblerBase
* pAsm
,
596 R700VertexInstruction
* vertex_instruction_ptr
)
598 if( GL_FALSE
== check_current_clause(pAsm
, CF_VTX_CLAUSE
) )
603 if( pAsm
->cf_current_vtx_clause_ptr
== NULL
||
604 ( (pAsm
->cf_current_vtx_clause_ptr
!= NULL
) &&
605 (pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
>= GetCFMaxInstructions(pAsm
->cf_current_vtx_clause_ptr
->m_ShaderInstType
)-1)
608 // Create new Vfetch control flow instruction for this new clause
609 pAsm
->cf_current_vtx_clause_ptr
= (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
611 if (pAsm
->cf_current_vtx_clause_ptr
!= NULL
)
613 Init_R700ControlFlowGenericClause(pAsm
->cf_current_vtx_clause_ptr
);
614 AddCFInstruction( pAsm
->pR700Shader
,
615 (R700ControlFlowInstruction
*)pAsm
->cf_current_vtx_clause_ptr
);
619 radeon_error("Could not allocate a new VFetch CF instruction.\n");
623 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.pop_count
= 0x0;
624 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
625 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
626 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
= 0x0;
627 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
628 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
629 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_VTX
;
630 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
631 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
633 LinkVertexInstruction(pAsm
->cf_current_vtx_clause_ptr
, vertex_instruction_ptr
);
637 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
++;
640 AddVTXInstruction(pAsm
->pR700Shader
, vertex_instruction_ptr
);
645 GLboolean
add_tex_instruction(r700_AssemblerBase
* pAsm
,
646 R700TextureInstruction
* tex_instruction_ptr
)
648 if ( GL_FALSE
== check_current_clause(pAsm
, CF_TEX_CLAUSE
) )
653 if ( pAsm
->cf_current_tex_clause_ptr
== NULL
||
654 ( (pAsm
->cf_current_tex_clause_ptr
!= NULL
) &&
655 (pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.count
>= GetCFMaxInstructions(pAsm
->cf_current_tex_clause_ptr
->m_ShaderInstType
)-1)
658 // new tex cf instruction for this new clause
659 pAsm
->cf_current_tex_clause_ptr
= (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
661 if (pAsm
->cf_current_tex_clause_ptr
!= NULL
)
663 Init_R700ControlFlowGenericClause(pAsm
->cf_current_tex_clause_ptr
);
664 AddCFInstruction( pAsm
->pR700Shader
,
665 (R700ControlFlowInstruction
*)pAsm
->cf_current_tex_clause_ptr
);
669 radeon_error("Could not allocate a new TEX CF instruction.\n");
673 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.pop_count
= 0x0;
674 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
675 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
677 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
678 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
679 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_TEX
;
680 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
681 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.barrier
= 0x0; //0x1;
685 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.count
++;
688 // If this clause constains any TEX instruction that is dependent on a previous instruction,
689 // set the barrier bit
690 if( pAsm
->pInstDeps
[pAsm
->uiCurInst
].nDstDep
> (-1) || pAsm
->need_tex_barrier
== GL_TRUE
)
692 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
695 if(NULL
== pAsm
->cf_current_tex_clause_ptr
->m_pLinkedTEXInstruction
)
697 pAsm
->cf_current_tex_clause_ptr
->m_pLinkedTEXInstruction
= tex_instruction_ptr
;
698 tex_instruction_ptr
->m_pLinkedGenericClause
= pAsm
->cf_current_tex_clause_ptr
;
701 AddTEXInstruction(pAsm
->pR700Shader
, tex_instruction_ptr
);
706 GLboolean
assemble_vfetch_instruction(r700_AssemblerBase
* pAsm
,
708 GLuint destination_register
,
709 GLuint number_of_elements
,
710 GLenum dataElementType
,
711 VTX_FETCH_METHOD
* pFetchMethod
)
713 GLuint client_size_inbyte
;
715 GLuint mega_fetch_count
;
716 GLuint is_mega_fetch_flag
;
718 R700VertexGenericFetch
* vfetch_instruction_ptr
;
719 R700VertexGenericFetch
* assembled_vfetch_instruction_ptr
= pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
];
721 if (assembled_vfetch_instruction_ptr
== NULL
)
723 vfetch_instruction_ptr
= (R700VertexGenericFetch
*) CALLOC_STRUCT(R700VertexGenericFetch
);
724 if (vfetch_instruction_ptr
== NULL
)
728 Init_R700VertexGenericFetch(vfetch_instruction_ptr
);
732 vfetch_instruction_ptr
= assembled_vfetch_instruction_ptr
;
735 data_format
= GetSurfaceFormat(dataElementType
, number_of_elements
, &client_size_inbyte
);
737 if(GL_TRUE
== pFetchMethod
->bEnableMini
) //More conditions here
743 mega_fetch_count
= MEGA_FETCH_BYTES
- 1;
744 is_mega_fetch_flag
= 0x1;
745 pFetchMethod
->mega_fetch_remainder
= MEGA_FETCH_BYTES
- client_size_inbyte
;
748 vfetch_instruction_ptr
->m_Word0
.f
.vtx_inst
= SQ_VTX_INST_FETCH
;
749 vfetch_instruction_ptr
->m_Word0
.f
.fetch_type
= SQ_VTX_FETCH_VERTEX_DATA
;
750 vfetch_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
752 vfetch_instruction_ptr
->m_Word0
.f
.buffer_id
= gl_client_id
;
753 vfetch_instruction_ptr
->m_Word0
.f
.src_gpr
= 0x0;
754 vfetch_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
755 vfetch_instruction_ptr
->m_Word0
.f
.src_sel_x
= SQ_SEL_X
;
756 vfetch_instruction_ptr
->m_Word0
.f
.mega_fetch_count
= mega_fetch_count
;
758 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (number_of_elements
< 1) ? SQ_SEL_0
: SQ_SEL_X
;
759 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (number_of_elements
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
760 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (number_of_elements
< 3) ? SQ_SEL_0
: SQ_SEL_Z
;
761 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (number_of_elements
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
763 vfetch_instruction_ptr
->m_Word1
.f
.use_const_fields
= 1;
765 // Destination register
766 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_gpr
= destination_register
;
767 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_rel
= SQ_ABSOLUTE
;
769 vfetch_instruction_ptr
->m_Word2
.f
.offset
= 0;
770 vfetch_instruction_ptr
->m_Word2
.f
.const_buf_no_stride
= 0x0;
772 vfetch_instruction_ptr
->m_Word2
.f
.mega_fetch
= is_mega_fetch_flag
;
774 if (assembled_vfetch_instruction_ptr
== NULL
)
776 if ( GL_FALSE
== add_vfetch_instruction(pAsm
, (R700VertexInstruction
*)vfetch_instruction_ptr
) )
781 if (pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
] != NULL
)
787 pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
] = vfetch_instruction_ptr
;
794 GLuint
gethelpr(r700_AssemblerBase
* pAsm
)
796 GLuint r
= pAsm
->uHelpReg
;
798 if (pAsm
->uHelpReg
> pAsm
->number_used_registers
)
800 pAsm
->number_used_registers
= pAsm
->uHelpReg
;
804 void resethelpr(r700_AssemblerBase
* pAsm
)
806 pAsm
->uHelpReg
= pAsm
->uFirstHelpReg
;
809 void checkop_init(r700_AssemblerBase
* pAsm
)
815 pAsm
->aArgSubst
[3] = -1;
818 GLboolean
mov_temp(r700_AssemblerBase
* pAsm
, int src
)
820 GLuint tmp
= gethelpr(pAsm
);
822 //mov src to temp helper gpr.
823 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
825 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
827 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
828 pAsm
->D
.dst
.reg
= tmp
;
830 nomask_PVSDST(&(pAsm
->D
.dst
));
832 if( GL_FALSE
== assemble_src(pAsm
, src
, 0) )
837 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
838 noneg_PVSSRC(&(pAsm
->S
[0].src
));
840 if( GL_FALSE
== next_ins(pAsm
) )
845 pAsm
->aArgSubst
[1 + src
] = tmp
;
850 GLboolean
checkop1(r700_AssemblerBase
* pAsm
)
856 GLboolean
checkop2(r700_AssemblerBase
* pAsm
)
858 GLboolean bSrcConst
[2];
859 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
863 if( (pILInst
->SrcReg
[0].File
== PROGRAM_CONSTANT
) ||
864 (pILInst
->SrcReg
[0].File
== PROGRAM_LOCAL_PARAM
) ||
865 (pILInst
->SrcReg
[0].File
== PROGRAM_ENV_PARAM
) ||
866 (pILInst
->SrcReg
[0].File
== PROGRAM_STATE_VAR
) )
868 bSrcConst
[0] = GL_TRUE
;
872 bSrcConst
[0] = GL_FALSE
;
874 if( (pILInst
->SrcReg
[1].File
== PROGRAM_CONSTANT
) ||
875 (pILInst
->SrcReg
[1].File
== PROGRAM_LOCAL_PARAM
) ||
876 (pILInst
->SrcReg
[1].File
== PROGRAM_ENV_PARAM
) ||
877 (pILInst
->SrcReg
[1].File
== PROGRAM_STATE_VAR
) )
879 bSrcConst
[1] = GL_TRUE
;
883 bSrcConst
[1] = GL_FALSE
;
886 if( (bSrcConst
[0] == GL_TRUE
) && (bSrcConst
[1] == GL_TRUE
) )
888 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[1].Index
)
890 if( GL_FALSE
== mov_temp(pAsm
, 1) )
900 GLboolean
checkop3(r700_AssemblerBase
* pAsm
)
902 GLboolean bSrcConst
[3];
903 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
907 if( (pILInst
->SrcReg
[0].File
== PROGRAM_CONSTANT
) ||
908 (pILInst
->SrcReg
[0].File
== PROGRAM_LOCAL_PARAM
) ||
909 (pILInst
->SrcReg
[0].File
== PROGRAM_ENV_PARAM
) ||
910 (pILInst
->SrcReg
[0].File
== PROGRAM_STATE_VAR
) )
912 bSrcConst
[0] = GL_TRUE
;
916 bSrcConst
[0] = GL_FALSE
;
918 if( (pILInst
->SrcReg
[1].File
== PROGRAM_CONSTANT
) ||
919 (pILInst
->SrcReg
[1].File
== PROGRAM_LOCAL_PARAM
) ||
920 (pILInst
->SrcReg
[1].File
== PROGRAM_ENV_PARAM
) ||
921 (pILInst
->SrcReg
[1].File
== PROGRAM_STATE_VAR
) )
923 bSrcConst
[1] = GL_TRUE
;
927 bSrcConst
[1] = GL_FALSE
;
929 if( (pILInst
->SrcReg
[2].File
== PROGRAM_CONSTANT
) ||
930 (pILInst
->SrcReg
[2].File
== PROGRAM_LOCAL_PARAM
) ||
931 (pILInst
->SrcReg
[2].File
== PROGRAM_ENV_PARAM
) ||
932 (pILInst
->SrcReg
[2].File
== PROGRAM_STATE_VAR
) )
934 bSrcConst
[2] = GL_TRUE
;
938 bSrcConst
[2] = GL_FALSE
;
941 if( (GL_TRUE
== bSrcConst
[0]) &&
942 (GL_TRUE
== bSrcConst
[1]) &&
943 (GL_TRUE
== bSrcConst
[2]) )
945 if( GL_FALSE
== mov_temp(pAsm
, 1) )
949 if( GL_FALSE
== mov_temp(pAsm
, 2) )
956 else if( (GL_TRUE
== bSrcConst
[0]) &&
957 (GL_TRUE
== bSrcConst
[1]) )
959 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[1].Index
)
961 if( GL_FALSE
== mov_temp(pAsm
, 1) )
969 else if ( (GL_TRUE
== bSrcConst
[0]) &&
970 (GL_TRUE
== bSrcConst
[2]) )
972 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[2].Index
)
974 if( GL_FALSE
== mov_temp(pAsm
, 2) )
982 else if( (GL_TRUE
== bSrcConst
[1]) &&
983 (GL_TRUE
== bSrcConst
[2]) )
985 if(pILInst
->SrcReg
[1].Index
!= pILInst
->SrcReg
[2].Index
)
987 if( GL_FALSE
== mov_temp(pAsm
, 2) )
999 GLboolean
assemble_src(r700_AssemblerBase
*pAsm
,
1003 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1010 if(pAsm
->aArgSubst
[1+src
] >= 0)
1012 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1013 pAsm
->S
[fld
].src
.rtype
= SRC_REG_TEMPORARY
;
1014 pAsm
->S
[fld
].src
.reg
= pAsm
->aArgSubst
[1+src
];
1018 switch (pILInst
->SrcReg
[src
].File
)
1020 case PROGRAM_TEMPORARY
:
1021 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1022 pAsm
->S
[fld
].src
.rtype
= SRC_REG_TEMPORARY
;
1023 pAsm
->S
[fld
].src
.reg
= pILInst
->SrcReg
[src
].Index
+ pAsm
->starting_temp_register_number
;
1025 case PROGRAM_CONSTANT
:
1026 case PROGRAM_LOCAL_PARAM
:
1027 case PROGRAM_ENV_PARAM
:
1028 case PROGRAM_STATE_VAR
:
1029 if (1 == pILInst
->SrcReg
[src
].RelAddr
)
1031 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_RELATIVE_A0
);
1035 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1038 pAsm
->S
[fld
].src
.rtype
= SRC_REG_CONSTANT
;
1039 pAsm
->S
[fld
].src
.reg
= pILInst
->SrcReg
[src
].Index
;
1042 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1043 pAsm
->S
[fld
].src
.rtype
= SRC_REG_INPUT
;
1044 switch (pAsm
->currentShaderType
)
1047 pAsm
->S
[fld
].src
.reg
= pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[src
].Index
];
1050 pAsm
->S
[fld
].src
.reg
= pAsm
->ucVP_AttributeMap
[pILInst
->SrcReg
[src
].Index
];
1055 radeon_error("Invalid source argument type\n");
1060 pAsm
->S
[fld
].src
.swizzlex
= pILInst
->SrcReg
[src
].Swizzle
& 0x7;
1061 pAsm
->S
[fld
].src
.swizzley
= (pILInst
->SrcReg
[src
].Swizzle
>> 3) & 0x7;
1062 pAsm
->S
[fld
].src
.swizzlez
= (pILInst
->SrcReg
[src
].Swizzle
>> 6) & 0x7;
1063 pAsm
->S
[fld
].src
.swizzlew
= (pILInst
->SrcReg
[src
].Swizzle
>> 9) & 0x7;
1065 pAsm
->S
[fld
].src
.negx
= pILInst
->SrcReg
[src
].Negate
& 0x1;
1066 pAsm
->S
[fld
].src
.negy
= (pILInst
->SrcReg
[src
].Negate
>> 1) & 0x1;
1067 pAsm
->S
[fld
].src
.negz
= (pILInst
->SrcReg
[src
].Negate
>> 2) & 0x1;
1068 pAsm
->S
[fld
].src
.negw
= (pILInst
->SrcReg
[src
].Negate
>> 3) & 0x1;
1073 GLboolean
assemble_dst(r700_AssemblerBase
*pAsm
)
1075 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1076 switch (pILInst
->DstReg
.File
)
1078 case PROGRAM_TEMPORARY
:
1079 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1080 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1081 pAsm
->D
.dst
.reg
= pILInst
->DstReg
.Index
+ pAsm
->starting_temp_register_number
;
1083 case PROGRAM_ADDRESS
:
1084 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1085 pAsm
->D
.dst
.rtype
= DST_REG_A0
;
1086 pAsm
->D
.dst
.reg
= 0;
1088 case PROGRAM_OUTPUT
:
1089 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1090 pAsm
->D
.dst
.rtype
= DST_REG_OUT
;
1091 switch (pAsm
->currentShaderType
)
1094 pAsm
->D
.dst
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->DstReg
.Index
];
1097 pAsm
->D
.dst
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->DstReg
.Index
];
1102 radeon_error("Invalid destination output argument type\n");
1106 pAsm
->D
.dst
.writex
= pILInst
->DstReg
.WriteMask
& 0x1;
1107 pAsm
->D
.dst
.writey
= (pILInst
->DstReg
.WriteMask
>> 1) & 0x1;
1108 pAsm
->D
.dst
.writez
= (pILInst
->DstReg
.WriteMask
>> 2) & 0x1;
1109 pAsm
->D
.dst
.writew
= (pILInst
->DstReg
.WriteMask
>> 3) & 0x1;
1114 GLboolean
tex_dst(r700_AssemblerBase
*pAsm
)
1116 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1118 if(PROGRAM_TEMPORARY
== pILInst
->DstReg
.File
)
1120 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1121 pAsm
->D
.dst
.reg
= pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.Index
+ pAsm
->starting_temp_register_number
;
1123 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1125 else if(PROGRAM_OUTPUT
== pILInst
->DstReg
.File
)
1127 pAsm
->D
.dst
.rtype
= DST_REG_OUT
;
1128 switch (pAsm
->currentShaderType
)
1131 pAsm
->D
.dst
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->DstReg
.Index
];
1134 pAsm
->D
.dst
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->DstReg
.Index
];
1138 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1142 radeon_error("Invalid destination output argument type\n");
1146 pAsm
->D
.dst
.writex
= pILInst
->DstReg
.WriteMask
& 0x1;
1147 pAsm
->D
.dst
.writey
= (pILInst
->DstReg
.WriteMask
>> 1) & 0x1;
1148 pAsm
->D
.dst
.writez
= (pILInst
->DstReg
.WriteMask
>> 2) & 0x1;
1149 pAsm
->D
.dst
.writew
= (pILInst
->DstReg
.WriteMask
>> 3) & 0x1;
1154 GLboolean
tex_src(r700_AssemblerBase
*pAsm
)
1156 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1158 GLboolean bValidTexCoord
= GL_FALSE
;
1160 if(pAsm
->aArgSubst
[1] >= 0)
1162 bValidTexCoord
= GL_TRUE
;
1163 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
1164 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
1165 pAsm
->S
[0].src
.reg
= pAsm
->aArgSubst
[1];
1169 switch (pILInst
->SrcReg
[0].File
) {
1170 case PROGRAM_CONSTANT
:
1171 case PROGRAM_LOCAL_PARAM
:
1172 case PROGRAM_ENV_PARAM
:
1173 case PROGRAM_STATE_VAR
:
1175 case PROGRAM_TEMPORARY
:
1176 bValidTexCoord
= GL_TRUE
;
1177 pAsm
->S
[0].src
.reg
= pILInst
->SrcReg
[0].Index
+
1178 pAsm
->starting_temp_register_number
;
1179 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
1182 switch (pILInst
->SrcReg
[0].Index
)
1184 case FRAG_ATTRIB_WPOS
:
1185 case FRAG_ATTRIB_COL0
:
1186 case FRAG_ATTRIB_COL1
:
1187 case FRAG_ATTRIB_FOGC
:
1188 case FRAG_ATTRIB_TEX0
:
1189 case FRAG_ATTRIB_TEX1
:
1190 case FRAG_ATTRIB_TEX2
:
1191 case FRAG_ATTRIB_TEX3
:
1192 case FRAG_ATTRIB_TEX4
:
1193 case FRAG_ATTRIB_TEX5
:
1194 case FRAG_ATTRIB_TEX6
:
1195 case FRAG_ATTRIB_TEX7
:
1196 bValidTexCoord
= GL_TRUE
;
1197 pAsm
->S
[0].src
.reg
=
1198 pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[0].Index
];
1199 pAsm
->S
[0].src
.rtype
= SRC_REG_INPUT
;
1201 case FRAG_ATTRIB_FACE
:
1202 fprintf(stderr
, "FRAG_ATTRIB_FACE unsupported\n");
1204 case FRAG_ATTRIB_PNTC
:
1205 fprintf(stderr
, "FRAG_ATTRIB_PNTC unsupported\n");
1207 case FRAG_ATTRIB_VAR0
:
1208 fprintf(stderr
, "FRAG_ATTRIB_VAR0 unsupported\n");
1215 if(GL_TRUE
== bValidTexCoord
)
1217 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
1221 radeon_error("Invalid source texcoord for TEX instruction\n");
1225 pAsm
->S
[0].src
.swizzlex
= pILInst
->SrcReg
[0].Swizzle
& 0x7;
1226 pAsm
->S
[0].src
.swizzley
= (pILInst
->SrcReg
[0].Swizzle
>> 3) & 0x7;
1227 pAsm
->S
[0].src
.swizzlez
= (pILInst
->SrcReg
[0].Swizzle
>> 6) & 0x7;
1228 pAsm
->S
[0].src
.swizzlew
= (pILInst
->SrcReg
[0].Swizzle
>> 9) & 0x7;
1230 pAsm
->S
[0].src
.negx
= pILInst
->SrcReg
[0].Negate
& 0x1;
1231 pAsm
->S
[0].src
.negy
= (pILInst
->SrcReg
[0].Negate
>> 1) & 0x1;
1232 pAsm
->S
[0].src
.negz
= (pILInst
->SrcReg
[0].Negate
>> 2) & 0x1;
1233 pAsm
->S
[0].src
.negw
= (pILInst
->SrcReg
[0].Negate
>> 3) & 0x1;
1238 GLboolean
assemble_tex_instruction(r700_AssemblerBase
*pAsm
, GLboolean normalized
)
1240 PVSSRC
* texture_coordinate_source
;
1241 PVSSRC
* texture_unit_source
;
1243 R700TextureInstruction
* tex_instruction_ptr
= (R700TextureInstruction
*) CALLOC_STRUCT(R700TextureInstruction
);
1244 if (tex_instruction_ptr
== NULL
)
1248 Init_R700TextureInstruction(tex_instruction_ptr
);
1250 texture_coordinate_source
= &(pAsm
->S
[0].src
);
1251 texture_unit_source
= &(pAsm
->S
[1].src
);
1253 tex_instruction_ptr
->m_Word0
.f
.tex_inst
= pAsm
->D
.dst
.opcode
;
1254 tex_instruction_ptr
->m_Word0
.f
.bc_frac_mode
= 0x0;
1255 tex_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
1257 tex_instruction_ptr
->m_Word0
.f
.resource_id
= texture_unit_source
->reg
;
1259 tex_instruction_ptr
->m_Word1
.f
.lod_bias
= 0x0;
1261 tex_instruction_ptr
->m_Word1
.f
.coord_type_x
= SQ_TEX_NORMALIZED
;
1262 tex_instruction_ptr
->m_Word1
.f
.coord_type_y
= SQ_TEX_NORMALIZED
;
1263 tex_instruction_ptr
->m_Word1
.f
.coord_type_z
= SQ_TEX_NORMALIZED
;
1264 tex_instruction_ptr
->m_Word1
.f
.coord_type_w
= SQ_TEX_NORMALIZED
;
1266 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
1267 tex_instruction_ptr
->m_Word1
.f
.coord_type_x
= SQ_TEX_UNNORMALIZED
;
1268 tex_instruction_ptr
->m_Word1
.f
.coord_type_y
= SQ_TEX_UNNORMALIZED
;
1269 tex_instruction_ptr
->m_Word1
.f
.coord_type_z
= SQ_TEX_UNNORMALIZED
;
1270 tex_instruction_ptr
->m_Word1
.f
.coord_type_w
= SQ_TEX_UNNORMALIZED
;
1273 tex_instruction_ptr
->m_Word2
.f
.offset_x
= 0x0;
1274 tex_instruction_ptr
->m_Word2
.f
.offset_y
= 0x0;
1275 tex_instruction_ptr
->m_Word2
.f
.offset_z
= 0x0;
1277 tex_instruction_ptr
->m_Word2
.f
.sampler_id
= texture_unit_source
->reg
;
1280 if ( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
1281 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
1283 tex_instruction_ptr
->m_Word0
.f
.src_gpr
= texture_coordinate_source
->reg
;
1284 tex_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
1286 tex_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
1287 tex_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
;
1289 tex_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (pAsm
->D
.dst
.writex
? texture_unit_source
->swizzlex
: SQ_SEL_MASK
);
1290 tex_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (pAsm
->D
.dst
.writey
? texture_unit_source
->swizzley
: SQ_SEL_MASK
);
1291 tex_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (pAsm
->D
.dst
.writez
? texture_unit_source
->swizzlez
: SQ_SEL_MASK
);
1292 tex_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (pAsm
->D
.dst
.writew
? texture_unit_source
->swizzlew
: SQ_SEL_MASK
);
1295 tex_instruction_ptr
->m_Word2
.f
.src_sel_x
= texture_coordinate_source
->swizzlex
;
1296 tex_instruction_ptr
->m_Word2
.f
.src_sel_y
= texture_coordinate_source
->swizzley
;
1297 tex_instruction_ptr
->m_Word2
.f
.src_sel_z
= texture_coordinate_source
->swizzlez
;
1298 tex_instruction_ptr
->m_Word2
.f
.src_sel_w
= texture_coordinate_source
->swizzlew
;
1302 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
1306 if( GL_FALSE
== add_tex_instruction(pAsm
, tex_instruction_ptr
) )
1314 void initialize(r700_AssemblerBase
*pAsm
)
1316 GLuint cycle
, component
;
1318 for (cycle
=0; cycle
<NUMBER_OF_CYCLES
; cycle
++)
1320 for (component
=0; component
<NUMBER_OF_COMPONENTS
; component
++)
1322 pAsm
->hw_gpr
[cycle
][component
] = (-1);
1325 for (component
=0; component
<NUMBER_OF_COMPONENTS
; component
++)
1327 pAsm
->hw_cfile_addr
[component
] = (-1);
1328 pAsm
->hw_cfile_chan
[component
] = (-1);
1332 GLboolean
assemble_alu_src(R700ALUInstruction
* alu_instruction_ptr
,
1335 BITS scalar_channel_index
)
1342 //--------------------------------------------------------------------------
1343 // Source for operands src0, src1.
1344 // Values [0,127] correspond to GPR[0..127].
1345 // Values [256,511] correspond to cfile constants c[0..255].
1347 //--------------------------------------------------------------------------
1348 // Other special values are shown in the list below.
1350 // 248 SQ_ALU_SRC_0: special constant 0.0.
1351 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
1353 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1354 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1356 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1357 // 253 SQ_ALU_SRC_LITERAL: literal constant.
1359 // 254 SQ_ALU_SRC_PV: previous vector result.
1360 // 255 SQ_ALU_SRC_PS: previous scalar result.
1361 //--------------------------------------------------------------------------
1363 BITS channel_swizzle
;
1364 switch (scalar_channel_index
)
1366 case 0: channel_swizzle
= pSource
->swizzlex
; break;
1367 case 1: channel_swizzle
= pSource
->swizzley
; break;
1368 case 2: channel_swizzle
= pSource
->swizzlez
; break;
1369 case 3: channel_swizzle
= pSource
->swizzlew
; break;
1370 default: channel_swizzle
= SQ_SEL_MASK
; break;
1373 if(channel_swizzle
== SQ_SEL_0
)
1375 src_sel
= SQ_ALU_SRC_0
;
1377 else if (channel_swizzle
== SQ_SEL_1
)
1379 src_sel
= SQ_ALU_SRC_1
;
1383 if ( (pSource
->rtype
== SRC_REG_TEMPORARY
) ||
1384 (pSource
->rtype
== SRC_REG_INPUT
)
1387 src_sel
= pSource
->reg
;
1389 else if (pSource
->rtype
== SRC_REG_CONSTANT
)
1391 src_sel
= pSource
->reg
+ CFILE_REGISTER_OFFSET
;
1395 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
1396 source_index
, pSource
->rtype
);
1401 if( ADDR_ABSOLUTE
== addrmode_PVSSRC(pSource
) )
1403 src_rel
= SQ_ABSOLUTE
;
1407 src_rel
= SQ_RELATIVE
;
1410 switch (channel_swizzle
)
1413 src_chan
= SQ_CHAN_X
;
1416 src_chan
= SQ_CHAN_Y
;
1419 src_chan
= SQ_CHAN_Z
;
1422 src_chan
= SQ_CHAN_W
;
1426 // Does not matter since src_sel controls
1427 src_chan
= SQ_CHAN_X
;
1430 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle
);
1435 switch (scalar_channel_index
)
1437 case 0: src_neg
= pSource
->negx
; break;
1438 case 1: src_neg
= pSource
->negy
; break;
1439 case 2: src_neg
= pSource
->negz
; break;
1440 case 3: src_neg
= pSource
->negw
; break;
1441 default: src_neg
= 0; break;
1444 switch (source_index
)
1447 alu_instruction_ptr
->m_Word0
.f
.src0_sel
= src_sel
;
1448 alu_instruction_ptr
->m_Word0
.f
.src0_rel
= src_rel
;
1449 alu_instruction_ptr
->m_Word0
.f
.src0_chan
= src_chan
;
1450 alu_instruction_ptr
->m_Word0
.f
.src0_neg
= src_neg
;
1453 alu_instruction_ptr
->m_Word0
.f
.src1_sel
= src_sel
;
1454 alu_instruction_ptr
->m_Word0
.f
.src1_rel
= src_rel
;
1455 alu_instruction_ptr
->m_Word0
.f
.src1_chan
= src_chan
;
1456 alu_instruction_ptr
->m_Word0
.f
.src1_neg
= src_neg
;
1459 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_sel
= src_sel
;
1460 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_rel
= src_rel
;
1461 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_chan
= src_chan
;
1462 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_neg
= src_neg
;
1465 radeon_error("Only three sources allowed in ALU opcodes.\n");
1473 GLboolean
add_alu_instruction(r700_AssemblerBase
* pAsm
,
1474 R700ALUInstruction
* alu_instruction_ptr
,
1475 GLuint contiguous_slots_needed
)
1477 if( GL_FALSE
== check_current_clause(pAsm
, CF_ALU_CLAUSE
) )
1482 if ( pAsm
->cf_current_alu_clause_ptr
== NULL
||
1483 ( (pAsm
->cf_current_alu_clause_ptr
!= NULL
) &&
1484 (pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
>= (GetCFMaxInstructions(pAsm
->cf_current_alu_clause_ptr
->m_ShaderInstType
)-contiguous_slots_needed
-1) )
1488 //new cf inst for this clause
1489 pAsm
->cf_current_alu_clause_ptr
= (R700ControlFlowALUClause
*) CALLOC_STRUCT(R700ControlFlowALUClause
);
1491 // link the new cf to cf segment
1492 if(NULL
!= pAsm
->cf_current_alu_clause_ptr
)
1494 Init_R700ControlFlowALUClause(pAsm
->cf_current_alu_clause_ptr
);
1495 AddCFInstruction( pAsm
->pR700Shader
,
1496 (R700ControlFlowInstruction
*)pAsm
->cf_current_alu_clause_ptr
);
1500 radeon_error("Could not allocate a new ALU CF instruction.\n");
1504 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank0
= 0x0;
1505 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank1
= 0x0;
1506 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_mode0
= SQ_CF_KCACHE_NOP
;
1508 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_mode1
= SQ_CF_KCACHE_NOP
;
1509 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr0
= 0x0;
1510 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr1
= 0x0;
1512 //cf_current_alu_clause_ptr->m_Word1.f.count = number_of_scalar_operations - 1;
1513 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
= 0x0;
1514 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_ALU
;
1516 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
1518 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
1522 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
++;
1525 // If this clause constains any instruction that is forward dependent on a TEX instruction,
1526 // set the whole_quad_mode for this clause
1527 if ( pAsm
->pInstDeps
[pAsm
->uiCurInst
].nDstDep
> (-1) )
1529 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x1;
1532 if (pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
>= (GetCFMaxInstructions(pAsm
->cf_current_alu_clause_ptr
->m_ShaderInstType
)-1) )
1534 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
1537 if(NULL
== pAsm
->cf_current_alu_clause_ptr
->m_pLinkedALUInstruction
)
1539 pAsm
->cf_current_alu_clause_ptr
->m_pLinkedALUInstruction
= alu_instruction_ptr
;
1540 alu_instruction_ptr
->m_pLinkedALUClause
= pAsm
->cf_current_alu_clause_ptr
;
1543 AddALUInstruction(pAsm
->pR700Shader
, alu_instruction_ptr
);
1548 void get_src_properties(R700ALUInstruction
* alu_instruction_ptr
,
1555 switch (source_index
)
1558 *psrc_sel
= alu_instruction_ptr
->m_Word0
.f
.src0_sel
;
1559 *psrc_rel
= alu_instruction_ptr
->m_Word0
.f
.src0_rel
;
1560 *psrc_chan
= alu_instruction_ptr
->m_Word0
.f
.src0_chan
;
1561 *psrc_neg
= alu_instruction_ptr
->m_Word0
.f
.src0_neg
;
1565 *psrc_sel
= alu_instruction_ptr
->m_Word0
.f
.src1_sel
;
1566 *psrc_rel
= alu_instruction_ptr
->m_Word0
.f
.src1_rel
;
1567 *psrc_chan
= alu_instruction_ptr
->m_Word0
.f
.src1_chan
;
1568 *psrc_neg
= alu_instruction_ptr
->m_Word0
.f
.src1_neg
;
1572 *psrc_sel
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_sel
;
1573 *psrc_rel
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_rel
;
1574 *psrc_chan
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_chan
;
1575 *psrc_neg
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_neg
;
1580 int is_cfile(BITS sel
)
1582 if (sel
> 255 && sel
< 512)
1589 int is_const(BITS sel
)
1595 else if(sel
>= SQ_ALU_SRC_0
&& sel
<= SQ_ALU_SRC_LITERAL
)
1602 int is_gpr(BITS sel
)
1604 if (sel
>= 0 && sel
< 128)
1611 const GLuint BANK_SWIZZLE_VEC
[8] = {SQ_ALU_VEC_210
, //000
1612 SQ_ALU_VEC_120
, //001
1613 SQ_ALU_VEC_102
, //010
1615 SQ_ALU_VEC_201
, //011
1616 SQ_ALU_VEC_012
, //100
1617 SQ_ALU_VEC_021
, //101
1619 SQ_ALU_VEC_012
, //110
1620 SQ_ALU_VEC_012
}; //111
1622 const GLuint BANK_SWIZZLE_SCL
[8] = {SQ_ALU_SCL_210
, //000
1623 SQ_ALU_SCL_122
, //001
1624 SQ_ALU_SCL_122
, //010
1626 SQ_ALU_SCL_221
, //011
1627 SQ_ALU_SCL_212
, //100
1628 SQ_ALU_SCL_122
, //101
1630 SQ_ALU_SCL_122
, //110
1631 SQ_ALU_SCL_122
}; //111
1633 GLboolean
reserve_cfile(r700_AssemblerBase
* pAsm
,
1637 int res_match
= (-1);
1638 int res_empty
= (-1);
1642 for (res
=3; res
>=0; res
--)
1644 if(pAsm
->hw_cfile_addr
[ res
] < 0)
1648 else if( (pAsm
->hw_cfile_addr
[res
] == (int)sel
)
1650 (pAsm
->hw_cfile_chan
[ res
] == (int) chan
) )
1658 // Read for this scalar component already reserved, nothing to do here.
1661 else if(res_empty
>= 0)
1663 pAsm
->hw_cfile_addr
[ res_empty
] = sel
;
1664 pAsm
->hw_cfile_chan
[ res_empty
] = chan
;
1668 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
1674 GLboolean
reserve_gpr(r700_AssemblerBase
* pAsm
, GLuint sel
, GLuint chan
, GLuint cycle
)
1676 if(pAsm
->hw_gpr
[cycle
][chan
] < 0)
1678 pAsm
->hw_gpr
[cycle
][chan
] = sel
;
1680 else if(pAsm
->hw_gpr
[cycle
][chan
] != (int)sel
)
1682 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
1689 GLboolean
cycle_for_scalar_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
)
1693 case SQ_ALU_SCL_210
:
1695 int table
[3] = {2, 1, 0};
1696 *pCycle
= table
[sel
];
1700 case SQ_ALU_SCL_122
:
1702 int table
[3] = {1, 2, 2};
1703 *pCycle
= table
[sel
];
1707 case SQ_ALU_SCL_212
:
1709 int table
[3] = {2, 1, 2};
1710 *pCycle
= table
[sel
];
1714 case SQ_ALU_SCL_221
:
1716 int table
[3] = {2, 2, 1};
1717 *pCycle
= table
[sel
];
1722 radeon_error("Bad Scalar bank swizzle value\n");
1729 GLboolean
cycle_for_vector_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
)
1733 case SQ_ALU_VEC_012
:
1735 int table
[3] = {0, 1, 2};
1736 *pCycle
= table
[sel
];
1739 case SQ_ALU_VEC_021
:
1741 int table
[3] = {0, 2, 1};
1742 *pCycle
= table
[sel
];
1745 case SQ_ALU_VEC_120
:
1747 int table
[3] = {1, 2, 0};
1748 *pCycle
= table
[sel
];
1751 case SQ_ALU_VEC_102
:
1753 int table
[3] = {1, 0, 2};
1754 *pCycle
= table
[sel
];
1757 case SQ_ALU_VEC_201
:
1759 int table
[3] = {2, 0, 1};
1760 *pCycle
= table
[sel
];
1763 case SQ_ALU_VEC_210
:
1765 int table
[3] = {2, 1, 0};
1766 *pCycle
= table
[sel
];
1770 radeon_error("Bad Vec bank swizzle value\n");
1778 GLboolean
check_scalar(r700_AssemblerBase
* pAsm
,
1779 R700ALUInstruction
* alu_instruction_ptr
)
1782 GLuint bank_swizzle
;
1783 GLuint const_count
= 0;
1792 BITS src_sel
[3] = {0,0,0};
1793 BITS src_chan
[3] = {0,0,0};
1794 BITS src_rel
[3] = {0,0,0};
1795 BITS src_neg
[3] = {0,0,0};
1799 GLuint number_of_operands
= r700GetNumOperands(pAsm
);
1801 for (src
=0; src
<number_of_operands
; src
++)
1803 get_src_properties(alu_instruction_ptr
,
1812 swizzle_key
= ( (is_const( src_sel
[0] ) ? 4 : 0) +
1813 (is_const( src_sel
[1] ) ? 2 : 0) +
1814 (is_const( src_sel
[2] ) ? 1 : 0) );
1816 alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
= BANK_SWIZZLE_SCL
[ swizzle_key
];
1818 for (src
=0; src
<number_of_operands
; src
++)
1820 sel
= src_sel
[src
];
1821 chan
= src_chan
[src
];
1822 rel
= src_rel
[src
];
1823 neg
= src_neg
[src
];
1825 if (is_const( sel
))
1827 // Any constant, including literal and inline constants
1830 if (is_cfile( sel
))
1832 reserve_cfile(pAsm
, sel
, chan
);
1838 for (src
=0; src
<number_of_operands
; src
++)
1840 sel
= src_sel
[src
];
1841 chan
= src_chan
[src
];
1842 rel
= src_rel
[src
];
1843 neg
= src_neg
[src
];
1847 bank_swizzle
= alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
;
1849 if( GL_FALSE
== cycle_for_scalar_bank_swizzle(bank_swizzle
, src
, &cycle
) )
1854 if(cycle
< const_count
)
1856 if( GL_FALSE
== reserve_gpr(pAsm
, sel
, chan
, cycle
) )
1867 GLboolean
check_vector(r700_AssemblerBase
* pAsm
,
1868 R700ALUInstruction
* alu_instruction_ptr
)
1871 GLuint bank_swizzle
;
1872 GLuint const_count
= 0;
1881 BITS src_sel
[3] = {0,0,0};
1882 BITS src_chan
[3] = {0,0,0};
1883 BITS src_rel
[3] = {0,0,0};
1884 BITS src_neg
[3] = {0,0,0};
1888 GLuint number_of_operands
= r700GetNumOperands(pAsm
);
1890 for (src
=0; src
<number_of_operands
; src
++)
1892 get_src_properties(alu_instruction_ptr
,
1901 swizzle_key
= ( (is_const( src_sel
[0] ) ? 4 : 0) +
1902 (is_const( src_sel
[1] ) ? 2 : 0) +
1903 (is_const( src_sel
[2] ) ? 1 : 0)
1906 alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
= BANK_SWIZZLE_VEC
[swizzle_key
];
1908 for (src
=0; src
<number_of_operands
; src
++)
1910 sel
= src_sel
[src
];
1911 chan
= src_chan
[src
];
1912 rel
= src_rel
[src
];
1913 neg
= src_neg
[src
];
1916 bank_swizzle
= alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
;
1920 if( GL_FALSE
== cycle_for_vector_bank_swizzle(bank_swizzle
, src
, &cycle
) )
1926 (sel
== src_sel
[0]) &&
1927 (chan
== src_chan
[0]) )
1932 if( GL_FALSE
== reserve_gpr(pAsm
, sel
, chan
, cycle
) )
1938 else if( is_const(sel
) )
1944 if( GL_FALSE
== reserve_cfile(pAsm
, sel
, chan
) )
1955 GLboolean
assemble_alu_instruction(r700_AssemblerBase
*pAsm
)
1957 GLuint number_of_scalar_operations
;
1958 GLboolean is_single_scalar_operation
;
1959 GLuint scalar_channel_index
;
1961 PVSSRC
* pcurrent_source
;
1962 int current_source_index
;
1963 GLuint contiguous_slots_needed
;
1965 GLuint uNumSrc
= r700GetNumOperands(pAsm
);
1966 GLuint channel_swizzle
, j
;
1967 GLuint chan_counter
[4] = {0, 0, 0, 0};
1968 PVSSRC
* pSource
[3];
1969 GLboolean bSplitInst
= GL_FALSE
;
1971 if (1 == pAsm
->D
.dst
.math
)
1973 is_single_scalar_operation
= GL_TRUE
;
1974 number_of_scalar_operations
= 1;
1978 is_single_scalar_operation
= GL_FALSE
;
1979 number_of_scalar_operations
= 4;
1981 /* current assembler doesn't do more than 1 register per source */
1983 /* check read port, only very preliminary algorithm, not count in
1984 src0/1 same comp case and prev slot repeat case; also not count relative
1985 addressing. TODO: improve performance. */
1986 for(j
=0; j
<uNumSrc
; j
++)
1988 pSource
[j
] = &(pAsm
->S
[j
].src
);
1990 for(scalar_channel_index
=0; scalar_channel_index
<4; scalar_channel_index
++)
1992 for(j
=0; j
<uNumSrc
; j
++)
1994 switch (scalar_channel_index
)
1996 case 0: channel_swizzle
= pSource
[j
]->swizzlex
; break;
1997 case 1: channel_swizzle
= pSource
[j
]->swizzley
; break;
1998 case 2: channel_swizzle
= pSource
[j
]->swizzlez
; break;
1999 case 3: channel_swizzle
= pSource
[j
]->swizzlew
; break;
2000 default: channel_swizzle
= SQ_SEL_MASK
; break;
2002 if ( ((pSource
[j
]->rtype
== SRC_REG_TEMPORARY
) ||
2003 (pSource
[j
]->rtype
== SRC_REG_INPUT
))
2004 && (channel_swizzle
<= SQ_SEL_W
) )
2006 chan_counter
[channel_swizzle
]++;
2010 if( (chan_counter
[SQ_SEL_X
] > 3)
2011 || (chan_counter
[SQ_SEL_Y
] > 3)
2012 || (chan_counter
[SQ_SEL_Z
] > 3)
2013 || (chan_counter
[SQ_SEL_W
] > 3) ) /* each chan bank has only 3 ports. */
2015 bSplitInst
= GL_TRUE
;
2020 contiguous_slots_needed
= 0;
2022 if(GL_TRUE
== is_reduction_opcode(&(pAsm
->D
)) )
2024 contiguous_slots_needed
= 4;
2029 for (scalar_channel_index
=0;
2030 scalar_channel_index
< number_of_scalar_operations
;
2031 scalar_channel_index
++)
2033 R700ALUInstruction
* alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2034 if (alu_instruction_ptr
== NULL
)
2038 Init_R700ALUInstruction(alu_instruction_ptr
);
2041 current_source_index
= 0;
2042 pcurrent_source
= &(pAsm
->S
[0].src
);
2044 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2045 current_source_index
,
2047 scalar_channel_index
) )
2055 current_source_index
= 1;
2056 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2058 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2059 current_source_index
,
2061 scalar_channel_index
) )
2068 alu_instruction_ptr
->m_Word0
.f
.index_mode
= SQ_INDEX_AR_X
;
2070 if( (is_single_scalar_operation
== GL_TRUE
)
2071 || (GL_TRUE
== bSplitInst
) )
2073 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
2077 alu_instruction_ptr
->m_Word0
.f
.last
= (scalar_channel_index
== 3) ? 1 : 0;
2080 alu_instruction_ptr
->m_Word0
.f
.pred_sel
= 0x0;
2081 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0x0;
2082 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0x0;
2085 if( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
2086 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
2088 alu_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
2092 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2096 alu_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
; //D.rtype
2098 if ( is_single_scalar_operation
== GL_TRUE
)
2100 // Override scalar_channel_index since only one scalar value will be written
2101 if(pAsm
->D
.dst
.writex
)
2103 scalar_channel_index
= 0;
2105 else if(pAsm
->D
.dst
.writey
)
2107 scalar_channel_index
= 1;
2109 else if(pAsm
->D
.dst
.writez
)
2111 scalar_channel_index
= 2;
2113 else if(pAsm
->D
.dst
.writew
)
2115 scalar_channel_index
= 3;
2119 alu_instruction_ptr
->m_Word1
.f
.dst_chan
= scalar_channel_index
;
2121 alu_instruction_ptr
->m_Word1
.f
.clamp
= pAsm
->pILInst
[pAsm
->uiCurInst
].SaturateMode
;
2123 if (pAsm
->D
.dst
.op3
)
2127 alu_instruction_ptr
->m_Word1_OP3
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2129 //There's 3rd src for op3
2130 current_source_index
= 2;
2131 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2133 if ( GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2134 current_source_index
,
2136 scalar_channel_index
) )
2146 alu_instruction_ptr
->m_Word1_OP2
.f6
.alu_inst
= pAsm
->D
.dst
.opcode
;
2148 alu_instruction_ptr
->m_Word1_OP2
.f6
.src0_abs
= 0x0;
2149 alu_instruction_ptr
->m_Word1_OP2
.f6
.src1_abs
= 0x0;
2151 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2152 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2153 switch (scalar_channel_index
)
2156 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writex
;
2159 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writey
;
2162 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writez
;
2165 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writew
;
2168 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= 1; //SQ_SEL_MASK;
2171 alu_instruction_ptr
->m_Word1_OP2
.f6
.omod
= SQ_ALU_OMOD_OFF
;
2175 alu_instruction_ptr
->m_Word1_OP2
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2177 alu_instruction_ptr
->m_Word1_OP2
.f
.src0_abs
= 0x0;
2178 alu_instruction_ptr
->m_Word1_OP2
.f
.src1_abs
= 0x0;
2180 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2181 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2182 switch (scalar_channel_index
)
2185 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writex
;
2188 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writey
;
2191 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writez
;
2194 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writew
;
2197 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= 1; //SQ_SEL_MASK;
2200 alu_instruction_ptr
->m_Word1_OP2
.f
.omod
= SQ_ALU_OMOD_OFF
;
2204 if(GL_FALSE
== add_alu_instruction(pAsm
, alu_instruction_ptr
, contiguous_slots_needed
) )
2210 * Judge the type of current instruction, is it vector or scalar
2213 if (is_single_scalar_operation
)
2215 if(GL_FALSE
== check_scalar(pAsm
, alu_instruction_ptr
) )
2222 if(GL_FALSE
== check_vector(pAsm
, alu_instruction_ptr
) )
2228 contiguous_slots_needed
= 0;
2234 GLboolean
next_ins(r700_AssemblerBase
*pAsm
)
2236 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
2238 if( GL_TRUE
== pAsm
->is_tex
)
2240 if (pILInst
->TexSrcTarget
== TEXTURE_RECT_INDEX
) {
2241 if( GL_FALSE
== assemble_tex_instruction(pAsm
, GL_FALSE
) )
2243 radeon_error("Error assembling TEX instruction\n");
2247 if( GL_FALSE
== assemble_tex_instruction(pAsm
, GL_TRUE
) )
2249 radeon_error("Error assembling TEX instruction\n");
2256 if( GL_FALSE
== assemble_alu_instruction(pAsm
) )
2258 radeon_error("Error assembling ALU instruction\n");
2263 if(pAsm
->D
.dst
.rtype
== DST_REG_OUT
)
2267 // There is no mask for OP3 instructions, so all channels are written
2268 pAsm
->pucOutMask
[pAsm
->D
.dst
.reg
- pAsm
->starting_export_register_number
] = 0xF;
2272 pAsm
->pucOutMask
[pAsm
->D
.dst
.reg
- pAsm
->starting_export_register_number
]
2273 |= (unsigned char)pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
;
2277 //reset for next inst.
2279 pAsm
->S
[0].bits
= 0;
2280 pAsm
->S
[1].bits
= 0;
2281 pAsm
->S
[2].bits
= 0;
2282 pAsm
->is_tex
= GL_FALSE
;
2283 pAsm
->need_tex_barrier
= GL_FALSE
;
2287 GLboolean
assemble_math_function(r700_AssemblerBase
* pAsm
, BITS opcode
)
2293 tmp
= gethelpr(pAsm
);
2295 // opcode tmp.x, a.x
2298 pAsm
->D
.dst
.opcode
= opcode
;
2299 pAsm
->D
.dst
.math
= 1;
2301 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2302 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2303 pAsm
->D
.dst
.reg
= tmp
;
2304 pAsm
->D
.dst
.writex
= 1;
2306 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2311 if ( GL_FALSE
== next_ins(pAsm
) )
2316 // Now replicate result to all necessary channels in destination
2317 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2319 if( GL_FALSE
== assemble_dst(pAsm
) )
2324 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2325 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
2326 pAsm
->S
[0].src
.reg
= tmp
;
2328 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
2329 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2331 if( GL_FALSE
== next_ins(pAsm
) )
2339 GLboolean
assemble_ABS(r700_AssemblerBase
*pAsm
)
2343 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
2345 if( GL_FALSE
== assemble_dst(pAsm
) )
2349 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2354 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
2355 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
2357 if ( GL_FALSE
== next_ins(pAsm
) )
2365 GLboolean
assemble_ADD(r700_AssemblerBase
*pAsm
)
2367 if( GL_FALSE
== checkop2(pAsm
) )
2372 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
2374 if( GL_FALSE
== assemble_dst(pAsm
) )
2379 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2384 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
2389 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_SUB
)
2391 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
2394 if( GL_FALSE
== next_ins(pAsm
) )
2402 GLboolean
assemble_ARL(r700_AssemblerBase
*pAsm
)
2403 { /* TODO: ar values dont' persist between clauses */
2404 if( GL_FALSE
== checkop1(pAsm
) )
2409 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOVA_FLOOR
;
2410 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2411 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2412 pAsm
->D
.dst
.reg
= 0;
2413 pAsm
->D
.dst
.writex
= 0;
2414 pAsm
->D
.dst
.writey
= 0;
2415 pAsm
->D
.dst
.writez
= 0;
2416 pAsm
->D
.dst
.writew
= 0;
2418 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2423 if( GL_FALSE
== next_ins(pAsm
) )
2431 GLboolean
assemble_BAD(char *opcode_str
)
2433 radeon_error("Not yet implemented instruction (%s)\n", opcode_str
);
2437 GLboolean
assemble_CMP(r700_AssemblerBase
*pAsm
)
2441 if( GL_FALSE
== checkop3(pAsm
) )
2446 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_CNDGE
;
2447 pAsm
->D
.dst
.op3
= 1;
2451 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
2453 //OP3 has no support for write mask
2454 tmp
= gethelpr(pAsm
);
2456 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2457 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2458 pAsm
->D
.dst
.reg
= tmp
;
2460 nomask_PVSDST(&(pAsm
->D
.dst
));
2464 if( GL_FALSE
== assemble_dst(pAsm
) )
2470 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2475 if( GL_FALSE
== assemble_src(pAsm
, 2, 1) )
2480 if( GL_FALSE
== assemble_src(pAsm
, 1, 2) )
2485 if ( GL_FALSE
== next_ins(pAsm
) )
2490 if (0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
2492 if( GL_FALSE
== assemble_dst(pAsm
) )
2497 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2500 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2501 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2502 pAsm
->S
[0].src
.reg
= tmp
;
2504 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2505 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
2507 if( GL_FALSE
== next_ins(pAsm
) )
2516 GLboolean
assemble_COS(r700_AssemblerBase
*pAsm
)
2518 return assemble_math_function(pAsm
, SQ_OP2_INST_COS
);
2521 GLboolean
assemble_DOT(r700_AssemblerBase
*pAsm
)
2523 if( GL_FALSE
== checkop2(pAsm
) )
2528 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_DOT4
;
2530 if( GL_FALSE
== assemble_dst(pAsm
) )
2535 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2540 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
2545 if(OPCODE_DP3
== pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
)
2547 zerocomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
2548 zerocomp_PVSSRC(&(pAsm
->S
[1].src
), 3);
2550 else if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_DPH
)
2552 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
2555 if ( GL_FALSE
== next_ins(pAsm
) )
2563 GLboolean
assemble_DST(r700_AssemblerBase
*pAsm
)
2565 if( GL_FALSE
== checkop2(pAsm
) )
2570 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
2572 if( GL_FALSE
== assemble_dst(pAsm
) )
2577 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2582 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
2587 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 0);
2588 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
2590 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 0);
2591 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 2);
2593 if ( GL_FALSE
== next_ins(pAsm
) )
2601 GLboolean
assemble_EX2(r700_AssemblerBase
*pAsm
)
2603 return assemble_math_function(pAsm
, SQ_OP2_INST_EXP_IEEE
);
2606 GLboolean
assemble_FLR(r700_AssemblerBase
*pAsm
)
2610 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
2612 if ( GL_FALSE
== assemble_dst(pAsm
) )
2617 if ( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2622 if ( GL_FALSE
== next_ins(pAsm
) )
2630 GLboolean
assemble_FLR_INT(r700_AssemblerBase
*pAsm
)
2632 return assemble_math_function(pAsm
, SQ_OP2_INST_FLT_TO_INT
);
2635 GLboolean
assemble_FRC(r700_AssemblerBase
*pAsm
)
2639 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
2641 if ( GL_FALSE
== assemble_dst(pAsm
) )
2646 if ( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2651 if ( GL_FALSE
== next_ins(pAsm
) )
2659 GLboolean
assemble_KIL(r700_AssemblerBase
*pAsm
)
2661 /* TODO: doc says KILL has to be last(end) ALU clause */
2665 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_KILLGT
;
2667 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2668 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2669 pAsm
->D
.dst
.reg
= 0;
2670 pAsm
->D
.dst
.writex
= 0;
2671 pAsm
->D
.dst
.writey
= 0;
2672 pAsm
->D
.dst
.writez
= 0;
2673 pAsm
->D
.dst
.writew
= 0;
2675 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2676 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2677 pAsm
->S
[0].src
.reg
= 0;
2679 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_0
);
2680 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2682 if ( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
2687 if ( GL_FALSE
== next_ins(pAsm
) )
2692 pAsm
->pR700Shader
->killIsUsed
= GL_TRUE
;
2697 GLboolean
assemble_LG2(r700_AssemblerBase
*pAsm
)
2699 return assemble_math_function(pAsm
, SQ_OP2_INST_LOG_IEEE
);
2702 GLboolean
assemble_LRP(r700_AssemblerBase
*pAsm
)
2706 if( GL_FALSE
== checkop3(pAsm
) )
2711 tmp
= gethelpr(pAsm
);
2713 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
2715 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2716 pAsm
->D
.dst
.reg
= tmp
;
2717 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2718 nomask_PVSDST(&(pAsm
->D
.dst
));
2721 if( GL_FALSE
== assemble_src(pAsm
, 1, 0) )
2726 if ( GL_FALSE
== assemble_src(pAsm
, 2, 1) )
2731 neg_PVSSRC(&(pAsm
->S
[1].src
));
2733 if( GL_FALSE
== next_ins(pAsm
) )
2738 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
2739 pAsm
->D
.dst
.op3
= 1;
2741 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2742 pAsm
->D
.dst
.reg
= tmp
;
2743 nomask_PVSDST(&(pAsm
->D
.dst
));
2744 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2746 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2747 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2748 pAsm
->S
[0].src
.reg
= tmp
;
2749 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
2752 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
2756 if( GL_FALSE
== assemble_src(pAsm
, 2, -1) )
2761 if( GL_FALSE
== next_ins(pAsm
) )
2766 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2768 if( GL_FALSE
== assemble_dst(pAsm
) )
2773 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2774 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2775 pAsm
->S
[0].src
.reg
= tmp
;
2776 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
2778 if( GL_FALSE
== next_ins(pAsm
) )
2786 GLboolean
assemble_MAD(struct r700_AssemblerBase
*pAsm
)
2789 GLboolean bReplaceDst
= GL_FALSE
;
2790 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
2792 if( GL_FALSE
== checkop3(pAsm
) )
2797 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
2798 pAsm
->D
.dst
.op3
= 1;
2802 if(PROGRAM_TEMPORARY
== pILInst
->DstReg
.File
)
2803 { /* TODO : more investigation on MAD src and dst using same register */
2804 for(ii
=0; ii
<3; ii
++)
2806 if( (PROGRAM_TEMPORARY
== pILInst
->SrcReg
[ii
].File
)
2807 && (pILInst
->DstReg
.Index
== pILInst
->SrcReg
[ii
].Index
) )
2809 bReplaceDst
= GL_TRUE
;
2814 if(0xF != pILInst
->DstReg
.WriteMask
)
2815 { /* OP3 has no support for write mask */
2816 bReplaceDst
= GL_TRUE
;
2819 if(GL_TRUE
== bReplaceDst
)
2821 tmp
= gethelpr(pAsm
);
2823 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2824 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2825 pAsm
->D
.dst
.reg
= tmp
;
2827 nomask_PVSDST(&(pAsm
->D
.dst
));
2831 if( GL_FALSE
== assemble_dst(pAsm
) )
2837 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2842 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
2847 if( GL_FALSE
== assemble_src(pAsm
, 2, -1) )
2852 if ( GL_FALSE
== next_ins(pAsm
) )
2857 if (GL_TRUE
== bReplaceDst
)
2859 if( GL_FALSE
== assemble_dst(pAsm
) )
2864 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2867 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2868 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2869 pAsm
->S
[0].src
.reg
= tmp
;
2871 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2872 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
2874 if( GL_FALSE
== next_ins(pAsm
) )
2884 GLboolean
assemble_LIT(r700_AssemblerBase
*pAsm
)
2886 unsigned int dstReg
;
2887 unsigned int dstType
;
2888 unsigned int srcReg
;
2889 unsigned int srcType
;
2891 int tmp
= gethelpr(pAsm
);
2893 if( GL_FALSE
== assemble_dst(pAsm
) )
2897 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2901 dstReg
= pAsm
->D
.dst
.reg
;
2902 dstType
= pAsm
->D
.dst
.rtype
;
2903 srcReg
= pAsm
->S
[0].src
.reg
;
2904 srcType
= pAsm
->S
[0].src
.rtype
;
2906 /* dst.xw, <- 1.0 */
2907 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2908 pAsm
->D
.dst
.rtype
= dstType
;
2909 pAsm
->D
.dst
.reg
= dstReg
;
2910 pAsm
->D
.dst
.writex
= 1;
2911 pAsm
->D
.dst
.writey
= 0;
2912 pAsm
->D
.dst
.writez
= 0;
2913 pAsm
->D
.dst
.writew
= 1;
2914 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2915 pAsm
->S
[0].src
.reg
= tmp
;
2916 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2917 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2918 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_1
;
2919 pAsm
->S
[0].src
.swizzley
= SQ_SEL_1
;
2920 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_1
;
2921 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_1
;
2922 if( GL_FALSE
== next_ins(pAsm
) )
2927 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2932 /* dst.y = max(src.x, 0.0) */
2933 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
2934 pAsm
->D
.dst
.rtype
= dstType
;
2935 pAsm
->D
.dst
.reg
= dstReg
;
2936 pAsm
->D
.dst
.writex
= 0;
2937 pAsm
->D
.dst
.writey
= 1;
2938 pAsm
->D
.dst
.writez
= 0;
2939 pAsm
->D
.dst
.writew
= 0;
2940 pAsm
->S
[0].src
.rtype
= srcType
;
2941 pAsm
->S
[0].src
.reg
= srcReg
;
2942 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2943 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
);
2944 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
2945 pAsm
->S
[1].src
.reg
= tmp
;
2946 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
2947 noneg_PVSSRC(&(pAsm
->S
[1].src
));
2948 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_0
;
2949 pAsm
->S
[1].src
.swizzley
= SQ_SEL_0
;
2950 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_0
;
2951 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_0
;
2952 if( GL_FALSE
== next_ins(pAsm
) )
2957 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2962 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Y
, SQ_SEL_Y
, SQ_SEL_Y
, SQ_SEL_Y
);
2964 /* dst.z = log(src.y) */
2965 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_CLAMPED
;
2966 pAsm
->D
.dst
.math
= 1;
2967 pAsm
->D
.dst
.rtype
= dstType
;
2968 pAsm
->D
.dst
.reg
= dstReg
;
2969 pAsm
->D
.dst
.writex
= 0;
2970 pAsm
->D
.dst
.writey
= 0;
2971 pAsm
->D
.dst
.writez
= 1;
2972 pAsm
->D
.dst
.writew
= 0;
2973 pAsm
->S
[0].src
.rtype
= srcType
;
2974 pAsm
->S
[0].src
.reg
= srcReg
;
2975 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2976 if( GL_FALSE
== next_ins(pAsm
) )
2981 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2986 if( GL_FALSE
== assemble_src(pAsm
, 0, 2) )
2991 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
);
2993 swizzleagain_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
);
2995 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
2996 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MUL_LIT
;
2997 pAsm
->D
.dst
.math
= 1;
2998 pAsm
->D
.dst
.op3
= 1;
2999 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3000 pAsm
->D
.dst
.reg
= tmp
;
3001 pAsm
->D
.dst
.writex
= 1;
3002 pAsm
->D
.dst
.writey
= 0;
3003 pAsm
->D
.dst
.writez
= 0;
3004 pAsm
->D
.dst
.writew
= 0;
3006 pAsm
->S
[0].src
.rtype
= srcType
;
3007 pAsm
->S
[0].src
.reg
= srcReg
;
3008 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3010 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3011 pAsm
->S
[1].src
.reg
= dstReg
;
3012 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3013 noneg_PVSSRC(&(pAsm
->S
[1].src
));
3014 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_Z
;
3015 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Z
;
3016 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
3017 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_Z
;
3019 pAsm
->S
[2].src
.rtype
= srcType
;
3020 pAsm
->S
[2].src
.reg
= srcReg
;
3021 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
), ADDR_ABSOLUTE
);
3023 if( GL_FALSE
== next_ins(pAsm
) )
3028 /* dst.z = exp(tmp.x) */
3029 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3030 pAsm
->D
.dst
.math
= 1;
3031 pAsm
->D
.dst
.rtype
= dstType
;
3032 pAsm
->D
.dst
.reg
= dstReg
;
3033 pAsm
->D
.dst
.writex
= 0;
3034 pAsm
->D
.dst
.writey
= 0;
3035 pAsm
->D
.dst
.writez
= 1;
3036 pAsm
->D
.dst
.writew
= 0;
3038 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3039 pAsm
->S
[0].src
.reg
= tmp
;
3040 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3041 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3042 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
3043 pAsm
->S
[0].src
.swizzley
= SQ_SEL_X
;
3044 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_X
;
3045 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_X
;
3047 if( GL_FALSE
== next_ins(pAsm
) )
3055 GLboolean
assemble_MAX(r700_AssemblerBase
*pAsm
)
3057 if( GL_FALSE
== checkop2(pAsm
) )
3062 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3064 if( GL_FALSE
== assemble_dst(pAsm
) )
3069 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3074 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3079 if( GL_FALSE
== next_ins(pAsm
) )
3087 GLboolean
assemble_MIN(r700_AssemblerBase
*pAsm
)
3089 if( GL_FALSE
== checkop2(pAsm
) )
3094 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MIN
;
3096 if( GL_FALSE
== assemble_dst(pAsm
) )
3101 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3106 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3111 if( GL_FALSE
== next_ins(pAsm
) )
3119 GLboolean
assemble_MOV(r700_AssemblerBase
*pAsm
)
3123 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3125 if (GL_FALSE
== assemble_dst(pAsm
))
3130 if (GL_FALSE
== assemble_src(pAsm
, 0, -1))
3135 if ( GL_FALSE
== next_ins(pAsm
) )
3143 GLboolean
assemble_MUL(r700_AssemblerBase
*pAsm
)
3145 if( GL_FALSE
== checkop2(pAsm
) )
3150 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3152 if( GL_FALSE
== assemble_dst(pAsm
) )
3157 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3162 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3167 if( GL_FALSE
== next_ins(pAsm
) )
3175 GLboolean
assemble_POW(r700_AssemblerBase
*pAsm
)
3181 tmp
= gethelpr(pAsm
);
3183 // LG2 tmp.x, a.swizzle
3184 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_IEEE
;
3185 pAsm
->D
.dst
.math
= 1;
3187 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3188 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3189 pAsm
->D
.dst
.reg
= tmp
;
3190 nomask_PVSDST(&(pAsm
->D
.dst
));
3192 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3197 if( GL_FALSE
== next_ins(pAsm
) )
3202 // MUL tmp.x, tmp.x, b.swizzle
3203 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3205 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3206 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3207 pAsm
->D
.dst
.reg
= tmp
;
3208 nomask_PVSDST(&(pAsm
->D
.dst
));
3210 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3211 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3212 pAsm
->S
[0].src
.reg
= tmp
;
3213 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3214 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3216 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3221 if( GL_FALSE
== next_ins(pAsm
) )
3226 // EX2 dst.mask, tmp.x
3228 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3229 pAsm
->D
.dst
.math
= 1;
3231 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3232 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3233 pAsm
->D
.dst
.reg
= tmp
;
3234 nomask_PVSDST(&(pAsm
->D
.dst
));
3236 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3237 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3238 pAsm
->S
[0].src
.reg
= tmp
;
3239 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3240 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3242 if( GL_FALSE
== next_ins(pAsm
) )
3247 // Now replicate result to all necessary channels in destination
3248 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3250 if( GL_FALSE
== assemble_dst(pAsm
) )
3255 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3256 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3257 pAsm
->S
[0].src
.reg
= tmp
;
3259 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3260 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3262 if( GL_FALSE
== next_ins(pAsm
) )
3270 GLboolean
assemble_RCP(r700_AssemblerBase
*pAsm
)
3272 return assemble_math_function(pAsm
, SQ_OP2_INST_RECIP_IEEE
);
3275 GLboolean
assemble_RSQ(r700_AssemblerBase
*pAsm
)
3277 return assemble_math_function(pAsm
, SQ_OP2_INST_RECIPSQRT_IEEE
);
3280 GLboolean
assemble_SIN(r700_AssemblerBase
*pAsm
)
3282 return assemble_math_function(pAsm
, SQ_OP2_INST_SIN
);
3285 GLboolean
assemble_SCS(r700_AssemblerBase
*pAsm
)
3291 tmp
= gethelpr(pAsm
);
3294 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_COS
;
3295 pAsm
->D
.dst
.math
= 1;
3297 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3298 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3299 pAsm
->D
.dst
.reg
= tmp
;
3300 pAsm
->D
.dst
.writex
= 1;
3302 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3307 if ( GL_FALSE
== next_ins(pAsm
) )
3313 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SIN
;
3314 pAsm
->D
.dst
.math
= 1;
3316 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3317 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3318 pAsm
->D
.dst
.reg
= tmp
;
3319 pAsm
->D
.dst
.writey
= 1;
3321 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3326 if( GL_FALSE
== next_ins(pAsm
) )
3331 // MOV dst.mask, tmp
3332 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3334 if( GL_FALSE
== assemble_dst(pAsm
) )
3339 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3340 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3341 pAsm
->S
[0].src
.reg
= tmp
;
3343 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3344 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_0
;
3345 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_0
;
3347 if ( GL_FALSE
== next_ins(pAsm
) )
3355 GLboolean
assemble_SGE(r700_AssemblerBase
*pAsm
)
3357 if( GL_FALSE
== checkop2(pAsm
) )
3362 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SETGE
;
3364 if( GL_FALSE
== assemble_dst(pAsm
) )
3369 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3374 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3379 if( GL_FALSE
== next_ins(pAsm
) )
3387 GLboolean
assemble_SLT(r700_AssemblerBase
*pAsm
)
3389 if( GL_FALSE
== checkop2(pAsm
) )
3394 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SETGT
;
3396 if( GL_FALSE
== assemble_dst(pAsm
) )
3401 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
3406 if( GL_FALSE
== assemble_src(pAsm
, 1, 0) )
3411 if( GL_FALSE
== next_ins(pAsm
) )
3419 GLboolean
assemble_STP(r700_AssemblerBase
*pAsm
)
3424 GLboolean
assemble_TEX(r700_AssemblerBase
*pAsm
)
3426 GLboolean src_const
;
3427 GLboolean need_barrier
= GL_FALSE
;
3431 switch (pAsm
->pILInst
[pAsm
->uiCurInst
].SrcReg
[0].File
)
3433 case PROGRAM_CONSTANT
:
3434 case PROGRAM_LOCAL_PARAM
:
3435 case PROGRAM_ENV_PARAM
:
3436 case PROGRAM_STATE_VAR
:
3437 src_const
= GL_TRUE
;
3439 case PROGRAM_TEMPORARY
:
3442 src_const
= GL_FALSE
;
3446 if (GL_TRUE
== src_const
)
3448 if ( GL_FALSE
== mov_temp(pAsm
, 0) )
3450 need_barrier
= GL_TRUE
;
3453 if (pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_TXP
)
3455 GLuint tmp
= gethelpr(pAsm
);
3456 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_RECIP_IEEE
;
3457 pAsm
->D
.dst
.math
= 1;
3458 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3459 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3460 pAsm
->D
.dst
.reg
= tmp
;
3461 pAsm
->D
.dst
.writew
= 1;
3463 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3467 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
);
3468 if( GL_FALSE
== next_ins(pAsm
) )
3473 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3474 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3475 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3476 pAsm
->D
.dst
.reg
= tmp
;
3477 pAsm
->D
.dst
.writex
= 1;
3478 pAsm
->D
.dst
.writey
= 1;
3479 pAsm
->D
.dst
.writez
= 1;
3480 pAsm
->D
.dst
.writew
= 0;
3482 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3486 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3487 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3488 pAsm
->S
[1].src
.reg
= tmp
;
3489 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_W
);
3491 if( GL_FALSE
== next_ins(pAsm
) )
3496 pAsm
->aArgSubst
[1] = tmp
;
3497 need_barrier
= GL_TRUE
;
3500 if (pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcTarget
== TEXTURE_CUBE_INDEX
)
3502 GLuint tmp1
= gethelpr(pAsm
);
3503 GLuint tmp2
= gethelpr(pAsm
);
3505 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
3506 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_CUBE
;
3507 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3508 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3509 pAsm
->D
.dst
.reg
= tmp1
;
3510 nomask_PVSDST(&(pAsm
->D
.dst
));
3512 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3517 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
3522 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Z
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
);
3523 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Y
, SQ_SEL_X
, SQ_SEL_Z
, SQ_SEL_Z
);
3525 if( GL_FALSE
== next_ins(pAsm
) )
3530 /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently
3531 * have to do explicit instruction
3533 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3534 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3535 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3536 pAsm
->D
.dst
.reg
= tmp1
;
3537 pAsm
->D
.dst
.writez
= 1;
3539 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3540 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3541 pAsm
->S
[0].src
.reg
= tmp1
;
3542 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3543 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
3544 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
3548 /* tmp1.z = RCP_e(|tmp1.z|) */
3549 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_RECIP_IEEE
;
3550 pAsm
->D
.dst
.math
= 1;
3551 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3552 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3553 pAsm
->D
.dst
.reg
= tmp1
;
3554 pAsm
->D
.dst
.writez
= 1;
3556 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3557 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3558 pAsm
->S
[0].src
.reg
= tmp1
;
3559 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_Z
;
3563 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
3564 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
3565 * muladd has no writemask, have to use another temp
3566 * also no support for imm constants, so add 1 here
3568 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
3569 pAsm
->D
.dst
.op3
= 1;
3570 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3571 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3572 pAsm
->D
.dst
.reg
= tmp2
;
3574 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3575 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3576 pAsm
->S
[0].src
.reg
= tmp1
;
3577 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3578 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3579 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3580 pAsm
->S
[1].src
.reg
= tmp1
;
3581 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Z
);
3582 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
), ADDR_ABSOLUTE
);
3583 pAsm
->S
[2].src
.rtype
= SRC_REG_TEMPORARY
;
3584 pAsm
->S
[2].src
.reg
= tmp1
;
3585 setswizzle_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_1
);
3589 /* ADD the remaining .5 */
3590 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
3591 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3592 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3593 pAsm
->D
.dst
.reg
= tmp2
;
3594 pAsm
->D
.dst
.writex
= 1;
3595 pAsm
->D
.dst
.writey
= 1;
3596 pAsm
->D
.dst
.writez
= 0;
3597 pAsm
->D
.dst
.writew
= 0;
3599 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3600 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3601 pAsm
->S
[0].src
.reg
= tmp2
;
3602 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3603 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3604 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3605 pAsm
->S
[1].src
.reg
= 252; // SQ_ALU_SRC_0_5
3606 noswizzle_PVSSRC(&(pAsm
->S
[1].src
));
3610 /* tmp1.xy = temp2.xy */
3611 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3612 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3613 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3614 pAsm
->D
.dst
.reg
= tmp1
;
3615 pAsm
->D
.dst
.writex
= 1;
3616 pAsm
->D
.dst
.writey
= 1;
3617 pAsm
->D
.dst
.writez
= 0;
3618 pAsm
->D
.dst
.writew
= 0;
3620 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3621 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3622 pAsm
->S
[0].src
.reg
= tmp2
;
3623 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3626 pAsm
->aArgSubst
[1] = tmp1
;
3627 need_barrier
= GL_TRUE
;
3631 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_TXB
)
3633 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE_L
;
3637 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE
;
3640 pAsm
->is_tex
= GL_TRUE
;
3641 if ( GL_TRUE
== need_barrier
)
3643 pAsm
->need_tex_barrier
= GL_TRUE
;
3645 // Set src1 to tex unit id
3646 pAsm
->S
[1].src
.reg
= pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcUnit
;
3647 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3649 //No sw info from mesa compiler, so hard code here.
3650 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_X
;
3651 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Y
;
3652 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
3653 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_W
;
3655 if( GL_FALSE
== tex_dst(pAsm
) )
3660 if( GL_FALSE
== tex_src(pAsm
) )
3665 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_TXP
)
3667 /* hopefully did swizzles before */
3668 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3671 if(pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcTarget
== TEXTURE_CUBE_INDEX
)
3673 /* SAMPLE dst, tmp.yxwy, CUBE */
3674 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_Y
;
3675 pAsm
->S
[0].src
.swizzley
= SQ_SEL_X
;
3676 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_W
;
3677 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_Y
;
3680 if ( GL_FALSE
== next_ins(pAsm
) )
3688 GLboolean
assemble_XPD(r700_AssemblerBase
*pAsm
)
3692 if( GL_FALSE
== checkop2(pAsm
) )
3697 tmp
= gethelpr(pAsm
);
3699 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3701 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3702 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3703 pAsm
->D
.dst
.reg
= tmp
;
3704 nomask_PVSDST(&(pAsm
->D
.dst
));
3706 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3711 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3716 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
, SQ_SEL_0
);
3717 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Y
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_0
);
3719 if( GL_FALSE
== next_ins(pAsm
) )
3724 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
3725 pAsm
->D
.dst
.op3
= 1;
3727 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
3729 tmp
= gethelpr(pAsm
);
3731 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3732 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3733 pAsm
->D
.dst
.reg
= tmp
;
3735 nomask_PVSDST(&(pAsm
->D
.dst
));
3739 if( GL_FALSE
== assemble_dst(pAsm
) )
3745 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3750 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3755 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Y
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_0
);
3756 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
, SQ_SEL_0
);
3758 // result1 + (neg) result0
3759 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
),ADDR_ABSOLUTE
);
3760 pAsm
->S
[2].src
.rtype
= SRC_REG_TEMPORARY
;
3761 pAsm
->S
[2].src
.reg
= tmp
;
3763 neg_PVSSRC(&(pAsm
->S
[2].src
));
3764 noswizzle_PVSSRC(&(pAsm
->S
[2].src
));
3766 if( GL_FALSE
== next_ins(pAsm
) )
3772 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
3774 if( GL_FALSE
== assemble_dst(pAsm
) )
3779 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3781 // Use tmp as source
3782 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3783 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3784 pAsm
->S
[0].src
.reg
= tmp
;
3786 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3787 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3789 if( GL_FALSE
== next_ins(pAsm
) )
3798 GLboolean
assemble_EXPORT(r700_AssemblerBase
*pAsm
)
3803 GLboolean
assemble_IF(r700_AssemblerBase
*pAsm
)
3808 GLboolean
assemble_ENDIF(r700_AssemblerBase
*pAsm
)
3813 GLboolean
AssembleInstr(GLuint uiNumberInsts
,
3814 struct prog_instruction
*pILInst
,
3815 r700_AssemblerBase
*pR700AsmCode
)
3819 pR700AsmCode
->pILInst
= pILInst
;
3820 for(i
=0; i
<uiNumberInsts
; i
++)
3822 pR700AsmCode
->uiCurInst
= i
;
3824 switch (pILInst
[i
].Opcode
)
3827 if ( GL_FALSE
== assemble_ABS(pR700AsmCode
) )
3832 if ( GL_FALSE
== assemble_ADD(pR700AsmCode
) )
3837 if ( GL_FALSE
== assemble_ARL(pR700AsmCode
) )
3841 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
3842 //if ( GL_FALSE == assemble_BAD("ARR") )
3847 if ( GL_FALSE
== assemble_CMP(pR700AsmCode
) )
3851 if ( GL_FALSE
== assemble_COS(pR700AsmCode
) )
3858 if ( GL_FALSE
== assemble_DOT(pR700AsmCode
) )
3863 if ( GL_FALSE
== assemble_DST(pR700AsmCode
) )
3868 if ( GL_FALSE
== assemble_EX2(pR700AsmCode
) )
3872 radeon_error("Not yet implemented instruction OPCODE_EXP \n");
3873 //if ( GL_FALSE == assemble_BAD("EXP") )
3875 break; // approx of EX2
3878 if ( GL_FALSE
== assemble_FLR(pR700AsmCode
) )
3882 // if ( GL_FALSE == assemble_FLR_INT() )
3887 if ( GL_FALSE
== assemble_FRC(pR700AsmCode
) )
3892 if ( GL_FALSE
== assemble_KIL(pR700AsmCode
) )
3896 if ( GL_FALSE
== assemble_LG2(pR700AsmCode
) )
3900 if ( GL_FALSE
== assemble_LIT(pR700AsmCode
) )
3904 if ( GL_FALSE
== assemble_LRP(pR700AsmCode
) )
3908 radeon_error("Not yet implemented instruction OPCODE_LOG \n");
3909 //if ( GL_FALSE == assemble_BAD("LOG") )
3911 break; // approx of LG2
3914 if ( GL_FALSE
== assemble_MAD(pR700AsmCode
) )
3918 if ( GL_FALSE
== assemble_MAX(pR700AsmCode
) )
3922 if ( GL_FALSE
== assemble_MIN(pR700AsmCode
) )
3927 if ( GL_FALSE
== assemble_MOV(pR700AsmCode
) )
3931 if ( GL_FALSE
== assemble_MUL(pR700AsmCode
) )
3936 if ( GL_FALSE
== assemble_POW(pR700AsmCode
) )
3940 if ( GL_FALSE
== assemble_RCP(pR700AsmCode
) )
3944 if ( GL_FALSE
== assemble_RSQ(pR700AsmCode
) )
3948 if ( GL_FALSE
== assemble_SIN(pR700AsmCode
) )
3952 if ( GL_FALSE
== assemble_SCS(pR700AsmCode
) )
3957 if ( GL_FALSE
== assemble_SGE(pR700AsmCode
) )
3961 if ( GL_FALSE
== assemble_SLT(pR700AsmCode
) )
3966 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
3971 if ( GL_FALSE
== assemble_MOV(pR700AsmCode
) )
3977 if( (i
+1)<uiNumberInsts
)
3979 if(OPCODE_END
!= pILInst
[i
+1].Opcode
)
3981 if( GL_TRUE
== IsTex(pILInst
[i
+1].Opcode
) )
3983 pR700AsmCode
->pInstDeps
[i
+1].nDstDep
= i
+1; //=1?
3993 if ( GL_FALSE
== assemble_TEX(pR700AsmCode
) )
3998 if ( GL_FALSE
== assemble_XPD(pR700AsmCode
) )
4003 if ( GL_FALSE
== assemble_IF(pR700AsmCode
) )
4007 radeon_error("Not yet implemented instruction OPCODE_ELSE \n");
4008 //if ( GL_FALSE == assemble_BAD("ELSE") )
4012 if ( GL_FALSE
== assemble_ENDIF(pR700AsmCode
) )
4016 //case OPCODE_EXPORT:
4017 // if ( GL_FALSE == assemble_EXPORT() )
4022 //pR700AsmCode->uiCurInst = i;
4023 //This is to remaind that if in later exoort there is depth/stencil
4024 //export, we need a mov to re-arrange DST channel, where using a
4025 //psuedo inst, we will use this end inst to do it.
4029 radeon_error("internal: unknown instruction\n");
4037 GLboolean
Process_Export(r700_AssemblerBase
* pAsm
,
4039 GLuint export_starting_index
,
4040 GLuint export_count
,
4041 GLuint starting_register_number
,
4042 GLboolean is_depth_export
)
4044 unsigned char ucWriteMask
;
4046 check_current_clause(pAsm
, CF_EMPTY_CLAUSE
);
4047 check_current_clause(pAsm
, CF_EXPORT_CLAUSE
); //alloc the cf_current_export_clause_ptr
4049 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.type
= type
;
4053 case SQ_EXPORT_PIXEL
:
4054 if(GL_TRUE
== is_depth_export
)
4056 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_PIXEL_Z
;
4060 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_PIXEL_MRT0
+ export_starting_index
;
4065 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_POS_0
+ export_starting_index
;
4068 case SQ_EXPORT_PARAM
:
4069 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= 0x0 + export_starting_index
;
4073 radeon_error("Unknown export type: %d\n", type
);
4078 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.rw_gpr
= starting_register_number
;
4080 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.rw_rel
= SQ_ABSOLUTE
;
4081 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.index_gpr
= 0x0;
4082 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.elem_size
= 0x3;
4084 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.burst_count
= (export_count
- 1);
4085 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4086 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4087 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT
; // _DONE
4088 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4089 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4091 if (export_count
== 1)
4093 ucWriteMask
= pAsm
->pucOutMask
[starting_register_number
- pAsm
->starting_export_register_number
];
4094 /* exports Z as a float into Red channel */
4095 if (GL_TRUE
== is_depth_export
)
4098 if( (ucWriteMask
& 0x1) != 0)
4100 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_X
;
4104 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_MASK
;
4106 if( ((ucWriteMask
>>1) & 0x1) != 0)
4108 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_Y
;
4112 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_MASK
;
4114 if( ((ucWriteMask
>>2) & 0x1) != 0)
4116 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_Z
;
4120 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_MASK
;
4122 if( ((ucWriteMask
>>3) & 0x1) != 0)
4124 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_W
;
4128 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_MASK
;
4133 // This should only be used if all components for all registers have been written
4134 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_X
;
4135 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_Y
;
4136 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_Z
;
4137 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_W
;
4140 pAsm
->cf_last_export_ptr
= pAsm
->cf_current_export_clause_ptr
;
4145 GLboolean
Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase
*pAsm
, BITS depth_channel_select
)
4147 gl_inst_opcode Opcode_save
= pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
; //Should be OPCODE_END
4148 pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
= OPCODE_MOV
;
4150 // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
4152 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4154 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4155 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4156 pAsm
->D
.dst
.reg
= pAsm
->depth_export_register_number
;
4158 pAsm
->D
.dst
.writex
= 1; // depth goes in R channel for HW
4160 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4161 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
4162 pAsm
->S
[0].src
.reg
= pAsm
->depth_export_register_number
;
4164 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), depth_channel_select
);
4166 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4168 if( GL_FALSE
== next_ins(pAsm
) )
4173 pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
= Opcode_save
;
4178 GLboolean
Process_Fragment_Exports(r700_AssemblerBase
*pR700AsmCode
,
4179 GLbitfield OutputsWritten
)
4182 GLuint export_count
= 0;
4184 if(pR700AsmCode
->depth_export_register_number
>= 0)
4186 if( GL_FALSE
== Move_Depth_Exports_To_Correct_Channels(pR700AsmCode
, SQ_SEL_Z
) ) // depth
4192 unBit
= 1 << FRAG_RESULT_COLOR
;
4193 if(OutputsWritten
& unBit
)
4195 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4199 pR700AsmCode
->uiFP_OutputMap
[FRAG_RESULT_COLOR
],
4206 unBit
= 1 << FRAG_RESULT_DEPTH
;
4207 if(OutputsWritten
& unBit
)
4209 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4213 pR700AsmCode
->uiFP_OutputMap
[FRAG_RESULT_DEPTH
],
4220 /* Need to export something, otherwise we'll hang
4221 * results are undefined anyway */
4222 if(export_count
== 0)
4224 Process_Export(pR700AsmCode
, SQ_EXPORT_PIXEL
, 0, 1, 0, GL_FALSE
);
4227 if(pR700AsmCode
->cf_last_export_ptr
!= NULL
)
4229 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
4230 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.end_of_program
= 0x1;
4236 GLboolean
Process_Vertex_Exports(r700_AssemblerBase
*pR700AsmCode
,
4237 GLbitfield OutputsWritten
)
4242 GLuint export_starting_index
= 0;
4243 GLuint export_count
= pR700AsmCode
->number_of_exports
;
4245 unBit
= 1 << VERT_RESULT_HPOS
;
4246 if(OutputsWritten
& unBit
)
4248 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4250 export_starting_index
,
4252 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_HPOS
],
4260 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
4263 pR700AsmCode
->number_of_exports
= export_count
;
4265 unBit
= 1 << VERT_RESULT_COL0
;
4266 if(OutputsWritten
& unBit
)
4268 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4270 export_starting_index
,
4272 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_COL0
],
4278 export_starting_index
++;
4281 unBit
= 1 << VERT_RESULT_COL1
;
4282 if(OutputsWritten
& unBit
)
4284 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4286 export_starting_index
,
4288 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_COL1
],
4294 export_starting_index
++;
4297 unBit
= 1 << VERT_RESULT_FOGC
;
4298 if(OutputsWritten
& unBit
)
4300 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4302 export_starting_index
,
4304 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_FOGC
],
4310 export_starting_index
++;
4315 unBit
= 1 << (VERT_RESULT_TEX0
+ i
);
4316 if(OutputsWritten
& unBit
)
4318 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4320 export_starting_index
,
4322 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_TEX0
+ i
],
4328 export_starting_index
++;
4332 // At least one param should be exported
4335 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
4339 if( GL_FALSE
== Process_Export(pR700AsmCode
,
4343 pR700AsmCode
->starting_export_register_number
,
4349 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_0
;
4350 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_0
;
4351 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_0
;
4352 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_1
;
4353 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
4356 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.end_of_program
= 0x1;
4361 GLboolean
Clean_Up_Assembler(r700_AssemblerBase
*pR700AsmCode
)
4363 FREE(pR700AsmCode
->pucOutMask
);
4364 FREE(pR700AsmCode
->pInstDeps
);