2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
33 #include "main/mtypes.h"
34 #include "main/imports.h"
35 #include "shader/prog_parameter.h"
37 #include "radeon_debug.h"
38 #include "r600_context.h"
40 #include "r700_assembler.h"
42 #define USE_CF_FOR_CONTINUE_BREAK 1
43 #define USE_CF_FOR_POP_AFTER 1
45 struct prog_instruction noise1_insts
[12] = {
46 {OPCODE_BGNSUB
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
47 {OPCODE_MOV
, {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
48 {OPCODE_MOV
, {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
49 {OPCODE_MOV
, {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
50 {OPCODE_SGT
, {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
51 {OPCODE_IF
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0},
52 {OPCODE_MOV
, {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
53 {OPCODE_RET
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
54 {OPCODE_ENDIF
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
55 {OPCODE_MOV
, {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
56 {OPCODE_RET
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
57 {OPCODE_ENDSUB
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}
59 float noise1_const
[2][4] = {
60 {0.300000f
, 0.900000f
, 0.500000f
, 0.300000f
}
63 COMPILED_SUB noise1_presub
= {
78 BITS
addrmode_PVSDST(PVSDST
* pPVSDST
)
80 return pPVSDST
->addrmode0
| ((BITS
)pPVSDST
->addrmode1
<< 1);
83 void setaddrmode_PVSDST(PVSDST
* pPVSDST
, BITS addrmode
)
85 pPVSDST
->addrmode0
= addrmode
& 1;
86 pPVSDST
->addrmode1
= (addrmode
>> 1) & 1;
89 void nomask_PVSDST(PVSDST
* pPVSDST
)
91 pPVSDST
->writex
= pPVSDST
->writey
= pPVSDST
->writez
= pPVSDST
->writew
= 1;
94 BITS
addrmode_PVSSRC(PVSSRC
* pPVSSRC
)
96 return pPVSSRC
->addrmode0
| ((BITS
)pPVSSRC
->addrmode1
<< 1);
99 void setaddrmode_PVSSRC(PVSSRC
* pPVSSRC
, BITS addrmode
)
101 pPVSSRC
->addrmode0
= addrmode
& 1;
102 pPVSSRC
->addrmode1
= (addrmode
>> 1) & 1;
106 void setswizzle_PVSSRC(PVSSRC
* pPVSSRC
, BITS swz
)
111 pPVSSRC
->swizzlew
= swz
;
114 void noswizzle_PVSSRC(PVSSRC
* pPVSSRC
)
116 pPVSSRC
->swizzlex
= SQ_SEL_X
;
117 pPVSSRC
->swizzley
= SQ_SEL_Y
;
118 pPVSSRC
->swizzlez
= SQ_SEL_Z
;
119 pPVSSRC
->swizzlew
= SQ_SEL_W
;
123 swizzleagain_PVSSRC(PVSSRC
* pPVSSRC
, BITS x
, BITS y
, BITS z
, BITS w
)
127 case SQ_SEL_X
: x
= pPVSSRC
->swizzlex
;
129 case SQ_SEL_Y
: x
= pPVSSRC
->swizzley
;
131 case SQ_SEL_Z
: x
= pPVSSRC
->swizzlez
;
133 case SQ_SEL_W
: x
= pPVSSRC
->swizzlew
;
140 case SQ_SEL_X
: y
= pPVSSRC
->swizzlex
;
142 case SQ_SEL_Y
: y
= pPVSSRC
->swizzley
;
144 case SQ_SEL_Z
: y
= pPVSSRC
->swizzlez
;
146 case SQ_SEL_W
: y
= pPVSSRC
->swizzlew
;
153 case SQ_SEL_X
: z
= pPVSSRC
->swizzlex
;
155 case SQ_SEL_Y
: z
= pPVSSRC
->swizzley
;
157 case SQ_SEL_Z
: z
= pPVSSRC
->swizzlez
;
159 case SQ_SEL_W
: z
= pPVSSRC
->swizzlew
;
166 case SQ_SEL_X
: w
= pPVSSRC
->swizzlex
;
168 case SQ_SEL_Y
: w
= pPVSSRC
->swizzley
;
170 case SQ_SEL_Z
: w
= pPVSSRC
->swizzlez
;
172 case SQ_SEL_W
: w
= pPVSSRC
->swizzlew
;
177 pPVSSRC
->swizzlex
= x
;
178 pPVSSRC
->swizzley
= y
;
179 pPVSSRC
->swizzlez
= z
;
180 pPVSSRC
->swizzlew
= w
;
183 void neg_PVSSRC(PVSSRC
* pPVSSRC
)
191 void noneg_PVSSRC(PVSSRC
* pPVSSRC
)
199 // negate argument (for SUB instead of ADD and alike)
200 void flipneg_PVSSRC(PVSSRC
* pPVSSRC
)
202 pPVSSRC
->negx
= !pPVSSRC
->negx
;
203 pPVSSRC
->negy
= !pPVSSRC
->negy
;
204 pPVSSRC
->negz
= !pPVSSRC
->negz
;
205 pPVSSRC
->negw
= !pPVSSRC
->negw
;
208 void zerocomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
)
212 case 0: pPVSSRC
->swizzlex
= SQ_SEL_0
; pPVSSRC
->negx
= 0; break;
213 case 1: pPVSSRC
->swizzley
= SQ_SEL_0
; pPVSSRC
->negy
= 0; break;
214 case 2: pPVSSRC
->swizzlez
= SQ_SEL_0
; pPVSSRC
->negz
= 0; break;
215 case 3: pPVSSRC
->swizzlew
= SQ_SEL_0
; pPVSSRC
->negw
= 0; break;
220 void onecomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
)
224 case 0: pPVSSRC
->swizzlex
= SQ_SEL_1
; pPVSSRC
->negx
= 0; break;
225 case 1: pPVSSRC
->swizzley
= SQ_SEL_1
; pPVSSRC
->negy
= 0; break;
226 case 2: pPVSSRC
->swizzlez
= SQ_SEL_1
; pPVSSRC
->negz
= 0; break;
227 case 3: pPVSSRC
->swizzlew
= SQ_SEL_1
; pPVSSRC
->negw
= 0; break;
232 BITS
is_misc_component_exported(VAP_OUT_VTX_FMT_0
* pOutVTXFmt0
)
234 return (pOutVTXFmt0
->point_size
|
235 pOutVTXFmt0
->edge_flag
|
236 pOutVTXFmt0
->rta_index
|
237 pOutVTXFmt0
->kill_flag
|
238 pOutVTXFmt0
->viewport_index
);
241 BITS
is_depth_component_exported(OUT_FRAGMENT_FMT_0
* pFPOutFmt
)
243 return (pFPOutFmt
->depth
|
244 pFPOutFmt
->stencil_ref
|
246 pFPOutFmt
->coverage_to_mask
);
249 GLboolean
is_reduction_opcode(PVSDWORD
* dest
)
251 if (dest
->dst
.op3
== 0)
253 if ( (dest
->dst
.opcode
== SQ_OP2_INST_DOT4
|| dest
->dst
.opcode
== SQ_OP2_INST_DOT4_IEEE
|| dest
->dst
.opcode
== SQ_OP2_INST_CUBE
) )
261 GLuint
GetSurfaceFormat(GLenum eType
, GLuint nChannels
, GLuint
* pClient_size
)
263 GLuint format
= FMT_INVALID
;
264 GLuint uiElemSize
= 0;
269 case GL_UNSIGNED_BYTE
:
274 format
= FMT_8
; break;
276 format
= FMT_8_8
; break;
278 format
= FMT_8_8_8
; break;
280 format
= FMT_8_8_8_8
; break;
286 case GL_UNSIGNED_SHORT
:
292 format
= FMT_16
; break;
294 format
= FMT_16_16
; break;
296 format
= FMT_16_16_16
; break;
298 format
= FMT_16_16_16_16
; break;
304 case GL_UNSIGNED_INT
:
310 format
= FMT_32
; break;
312 format
= FMT_32_32
; break;
314 format
= FMT_32_32_32
; break;
316 format
= FMT_32_32_32_32
; break;
327 format
= FMT_32_FLOAT
; break;
329 format
= FMT_32_32_FLOAT
; break;
331 format
= FMT_32_32_32_FLOAT
; break;
333 format
= FMT_32_32_32_32_FLOAT
; break;
343 format
= FMT_32_FLOAT
; break;
345 format
= FMT_32_32_FLOAT
; break;
347 format
= FMT_32_32_32_FLOAT
; break;
349 format
= FMT_32_32_32_32_FLOAT
; break;
356 //GL_ASSERT_NO_CASE();
359 if(NULL
!= pClient_size
)
361 *pClient_size
= uiElemSize
* nChannels
;
367 unsigned int r700GetNumOperands(GLuint opcode
, GLuint nIsOp3
)
376 case SQ_OP2_INST_ADD
:
377 case SQ_OP2_INST_KILLE
:
378 case SQ_OP2_INST_KILLGT
:
379 case SQ_OP2_INST_KILLGE
:
380 case SQ_OP2_INST_KILLNE
:
381 case SQ_OP2_INST_MUL
:
382 case SQ_OP2_INST_MAX
:
383 case SQ_OP2_INST_MIN
:
384 //case SQ_OP2_INST_MAX_DX10:
385 //case SQ_OP2_INST_MIN_DX10:
386 case SQ_OP2_INST_SETE
:
387 case SQ_OP2_INST_SETNE
:
388 case SQ_OP2_INST_SETGT
:
389 case SQ_OP2_INST_SETGE
:
390 case SQ_OP2_INST_PRED_SETE
:
391 case SQ_OP2_INST_PRED_SETGT
:
392 case SQ_OP2_INST_PRED_SETGE
:
393 case SQ_OP2_INST_PRED_SETNE
:
394 case SQ_OP2_INST_DOT4
:
395 case SQ_OP2_INST_DOT4_IEEE
:
396 case SQ_OP2_INST_CUBE
:
399 case SQ_OP2_INST_MOV
:
400 case SQ_OP2_INST_MOVA_FLOOR
:
401 case SQ_OP2_INST_FRACT
:
402 case SQ_OP2_INST_FLOOR
:
403 case SQ_OP2_INST_TRUNC
:
404 case SQ_OP2_INST_EXP_IEEE
:
405 case SQ_OP2_INST_LOG_CLAMPED
:
406 case SQ_OP2_INST_LOG_IEEE
:
407 case SQ_OP2_INST_RECIP_IEEE
:
408 case SQ_OP2_INST_RECIPSQRT_IEEE
:
409 case SQ_OP2_INST_FLT_TO_INT
:
410 case SQ_OP2_INST_SIN
:
411 case SQ_OP2_INST_COS
:
414 default: radeon_error(
415 "Need instruction operand number for %x.\n", opcode
);
421 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt
, r700_AssemblerBase
* pAsm
, R700_Shader
* pShader
)
425 Init_R700_Shader(pShader
);
426 pAsm
->pR700Shader
= pShader
;
427 pAsm
->currentShaderType
= spt
;
429 pAsm
->cf_last_export_ptr
= NULL
;
431 pAsm
->cf_current_export_clause_ptr
= NULL
;
432 pAsm
->cf_current_alu_clause_ptr
= NULL
;
433 pAsm
->cf_current_tex_clause_ptr
= NULL
;
434 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
435 pAsm
->cf_current_cf_clause_ptr
= NULL
;
437 // No clause has been created yet
438 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
440 pAsm
->number_of_colorandz_exports
= 0;
441 pAsm
->number_of_exports
= 0;
442 pAsm
->number_of_export_opcodes
= 0;
444 pAsm
->alu_x_opcode
= 0;
453 pAsm
->uLastPosUpdate
= 0;
455 *(BITS
*) &pAsm
->fp_stOutFmt0
= 0;
459 pAsm
->number_used_registers
= 0;
460 pAsm
->uUsedConsts
= 256;
464 pAsm
->uBoolConsts
= 0;
465 pAsm
->uIntConsts
= 0;
470 pAsm
->fc_stack
[0].type
= FC_NONE
;
475 pAsm
->aArgSubst
[3] = (-1);
479 for (i
=0; i
<NUMBER_OF_OUTPUT_COLORS
; i
++)
481 pAsm
->color_export_register_number
[i
] = (-1);
485 pAsm
->depth_export_register_number
= (-1);
486 pAsm
->stencil_export_register_number
= (-1);
487 pAsm
->coverage_to_mask_export_register_number
= (-1);
488 pAsm
->mask_export_register_number
= (-1);
490 pAsm
->starting_export_register_number
= 0;
491 pAsm
->starting_vfetch_register_number
= 0;
492 pAsm
->starting_temp_register_number
= 0;
493 pAsm
->uFirstHelpReg
= 0;
495 pAsm
->input_position_is_used
= GL_FALSE
;
496 pAsm
->input_normal_is_used
= GL_FALSE
;
498 for (i
=0; i
<NUMBER_OF_INPUT_COLORS
; i
++)
500 pAsm
->input_color_is_used
[ i
] = GL_FALSE
;
503 for (i
=0; i
<NUMBER_OF_TEXTURE_UNITS
; i
++)
505 pAsm
->input_texture_unit_is_used
[ i
] = GL_FALSE
;
508 for (i
=0; i
<VERT_ATTRIB_MAX
; i
++)
510 pAsm
->vfetch_instruction_ptr_array
[ i
] = NULL
;
513 pAsm
->number_of_inputs
= 0;
515 pAsm
->is_tex
= GL_FALSE
;
516 pAsm
->need_tex_barrier
= GL_FALSE
;
519 pAsm
->unSubArraySize
= 0;
520 pAsm
->unSubArrayPointer
= 0;
521 pAsm
->callers
= NULL
;
522 pAsm
->unCallerArraySize
= 0;
523 pAsm
->unCallerArrayPointer
= 0;
526 pAsm
->CALLSTACK
[0].FCSP_BeforeEntry
= 0;
527 pAsm
->CALLSTACK
[0].plstCFInstructions_local
528 = &(pAsm
->pR700Shader
->lstCFInstructions
);
530 pAsm
->CALLSTACK
[0].max
= 0;
531 pAsm
->CALLSTACK
[0].current
= 0;
533 SetActiveCFlist(pAsm
->pR700Shader
, pAsm
->CALLSTACK
[0].plstCFInstructions_local
);
537 pAsm
->presubs
= NULL
;
538 pAsm
->unPresubArraySize
= 0;
539 pAsm
->unNumPresub
= 0;
540 pAsm
->unCurNumILInsts
= 0;
542 pAsm
->unVetTexBits
= 0;
547 GLboolean
IsTex(gl_inst_opcode Opcode
)
549 if( (OPCODE_TEX
==Opcode
) || (OPCODE_TXP
==Opcode
) || (OPCODE_TXB
==Opcode
) ||
550 (OPCODE_DDX
==Opcode
) || (OPCODE_DDY
==Opcode
) )
557 GLboolean
IsAlu(gl_inst_opcode Opcode
)
559 //TODO : more for fc and ex for higher spec.
567 int check_current_clause(r700_AssemblerBase
* pAsm
,
568 CF_CLAUSE_TYPE new_clause_type
)
570 if (pAsm
->cf_current_clause_type
!= new_clause_type
)
571 { //Close last open clause
572 switch (pAsm
->cf_current_clause_type
)
575 if ( pAsm
->cf_current_alu_clause_ptr
!= NULL
)
577 pAsm
->cf_current_alu_clause_ptr
= NULL
;
581 if ( pAsm
->cf_current_vtx_clause_ptr
!= NULL
)
583 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
587 if ( pAsm
->cf_current_tex_clause_ptr
!= NULL
)
589 pAsm
->cf_current_tex_clause_ptr
= NULL
;
592 case CF_EXPORT_CLAUSE
:
593 if ( pAsm
->cf_current_export_clause_ptr
!= NULL
)
595 pAsm
->cf_current_export_clause_ptr
= NULL
;
598 case CF_OTHER_CLAUSE
:
599 if ( pAsm
->cf_current_cf_clause_ptr
!= NULL
)
601 pAsm
->cf_current_cf_clause_ptr
= NULL
;
604 case CF_EMPTY_CLAUSE
:
608 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type
);
612 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
615 switch (new_clause_type
)
618 pAsm
->cf_current_clause_type
= CF_ALU_CLAUSE
;
621 pAsm
->cf_current_clause_type
= CF_VTX_CLAUSE
;
624 pAsm
->cf_current_clause_type
= CF_TEX_CLAUSE
;
626 case CF_EXPORT_CLAUSE
:
628 R700ControlFlowSXClause
* pR700ControlFlowSXClause
629 = (R700ControlFlowSXClause
*) CALLOC_STRUCT(R700ControlFlowSXClause
);
631 // Add new export instruction to control flow program
632 if (pR700ControlFlowSXClause
!= 0)
634 pAsm
->cf_current_export_clause_ptr
= pR700ControlFlowSXClause
;
635 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause
);
636 AddCFInstruction( pAsm
->pR700Shader
,
637 (R700ControlFlowInstruction
*)pR700ControlFlowSXClause
);
642 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
645 pAsm
->cf_current_clause_type
= CF_EXPORT_CLAUSE
;
648 case CF_EMPTY_CLAUSE
:
650 case CF_OTHER_CLAUSE
:
651 pAsm
->cf_current_clause_type
= CF_OTHER_CLAUSE
;
655 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type
);
663 GLboolean
add_cf_instruction(r700_AssemblerBase
* pAsm
)
665 if(GL_FALSE
== check_current_clause(pAsm
, CF_OTHER_CLAUSE
))
670 pAsm
->cf_current_cf_clause_ptr
=
671 (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
673 if (pAsm
->cf_current_cf_clause_ptr
!= NULL
)
675 Init_R700ControlFlowGenericClause(pAsm
->cf_current_cf_clause_ptr
);
676 AddCFInstruction( pAsm
->pR700Shader
,
677 (R700ControlFlowInstruction
*)pAsm
->cf_current_cf_clause_ptr
);
681 radeon_error("Could not allocate a new VFetch CF instruction.\n");
688 GLboolean
add_vfetch_instruction(r700_AssemblerBase
* pAsm
,
689 R700VertexInstruction
* vertex_instruction_ptr
)
691 if( GL_FALSE
== check_current_clause(pAsm
, CF_VTX_CLAUSE
) )
696 if( pAsm
->cf_current_vtx_clause_ptr
== NULL
||
697 ( (pAsm
->cf_current_vtx_clause_ptr
!= NULL
) &&
698 (pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
>= GetCFMaxInstructions(pAsm
->cf_current_vtx_clause_ptr
->m_ShaderInstType
)-1)
701 // Create new Vfetch control flow instruction for this new clause
702 pAsm
->cf_current_vtx_clause_ptr
= (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
704 if (pAsm
->cf_current_vtx_clause_ptr
!= NULL
)
706 Init_R700ControlFlowGenericClause(pAsm
->cf_current_vtx_clause_ptr
);
707 AddCFInstruction( pAsm
->pR700Shader
,
708 (R700ControlFlowInstruction
*)pAsm
->cf_current_vtx_clause_ptr
);
712 radeon_error("Could not allocate a new VFetch CF instruction.\n");
716 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.pop_count
= 0x0;
717 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
718 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
719 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
= 0x0;
720 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
721 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
722 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_VTX
;
723 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
724 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
726 LinkVertexInstruction(pAsm
->cf_current_vtx_clause_ptr
, vertex_instruction_ptr
);
730 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
++;
733 AddVTXInstruction(pAsm
->pR700Shader
, vertex_instruction_ptr
);
738 GLboolean
add_tex_instruction(r700_AssemblerBase
* pAsm
,
739 R700TextureInstruction
* tex_instruction_ptr
)
741 if ( GL_FALSE
== check_current_clause(pAsm
, CF_TEX_CLAUSE
) )
746 if ( pAsm
->cf_current_tex_clause_ptr
== NULL
||
747 ( (pAsm
->cf_current_tex_clause_ptr
!= NULL
) &&
748 (pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.count
>= GetCFMaxInstructions(pAsm
->cf_current_tex_clause_ptr
->m_ShaderInstType
)-1)
751 // new tex cf instruction for this new clause
752 pAsm
->cf_current_tex_clause_ptr
= (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
754 if (pAsm
->cf_current_tex_clause_ptr
!= NULL
)
756 Init_R700ControlFlowGenericClause(pAsm
->cf_current_tex_clause_ptr
);
757 AddCFInstruction( pAsm
->pR700Shader
,
758 (R700ControlFlowInstruction
*)pAsm
->cf_current_tex_clause_ptr
);
762 radeon_error("Could not allocate a new TEX CF instruction.\n");
766 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.pop_count
= 0x0;
767 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
768 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
770 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
771 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
772 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_TEX
;
773 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
774 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.barrier
= 0x0; //0x1;
778 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.count
++;
781 // If this clause constains any TEX instruction that is dependent on a previous instruction,
782 // set the barrier bit
783 if( pAsm
->pInstDeps
[pAsm
->uiCurInst
].nDstDep
> (-1) || pAsm
->need_tex_barrier
== GL_TRUE
)
785 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
788 if(NULL
== pAsm
->cf_current_tex_clause_ptr
->m_pLinkedTEXInstruction
)
790 pAsm
->cf_current_tex_clause_ptr
->m_pLinkedTEXInstruction
= tex_instruction_ptr
;
791 tex_instruction_ptr
->m_pLinkedGenericClause
= pAsm
->cf_current_tex_clause_ptr
;
794 AddTEXInstruction(pAsm
->pR700Shader
, tex_instruction_ptr
);
799 GLboolean
assemble_vfetch_instruction(r700_AssemblerBase
* pAsm
,
801 GLuint destination_register
,
802 GLuint number_of_elements
,
803 GLenum dataElementType
,
804 VTX_FETCH_METHOD
* pFetchMethod
)
806 GLuint client_size_inbyte
;
808 GLuint mega_fetch_count
;
809 GLuint is_mega_fetch_flag
;
811 R700VertexGenericFetch
* vfetch_instruction_ptr
;
812 R700VertexGenericFetch
* assembled_vfetch_instruction_ptr
= pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
];
814 if (assembled_vfetch_instruction_ptr
== NULL
)
816 vfetch_instruction_ptr
= (R700VertexGenericFetch
*) CALLOC_STRUCT(R700VertexGenericFetch
);
817 if (vfetch_instruction_ptr
== NULL
)
821 Init_R700VertexGenericFetch(vfetch_instruction_ptr
);
825 vfetch_instruction_ptr
= assembled_vfetch_instruction_ptr
;
828 data_format
= GetSurfaceFormat(dataElementType
, number_of_elements
, &client_size_inbyte
);
830 if(GL_TRUE
== pFetchMethod
->bEnableMini
) //More conditions here
836 mega_fetch_count
= MEGA_FETCH_BYTES
- 1;
837 is_mega_fetch_flag
= 0x1;
838 pFetchMethod
->mega_fetch_remainder
= MEGA_FETCH_BYTES
- client_size_inbyte
;
841 vfetch_instruction_ptr
->m_Word0
.f
.vtx_inst
= SQ_VTX_INST_FETCH
;
842 vfetch_instruction_ptr
->m_Word0
.f
.fetch_type
= SQ_VTX_FETCH_VERTEX_DATA
;
843 vfetch_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
845 vfetch_instruction_ptr
->m_Word0
.f
.buffer_id
= gl_client_id
;
846 vfetch_instruction_ptr
->m_Word0
.f
.src_gpr
= 0x0;
847 vfetch_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
848 vfetch_instruction_ptr
->m_Word0
.f
.src_sel_x
= SQ_SEL_X
;
849 vfetch_instruction_ptr
->m_Word0
.f
.mega_fetch_count
= mega_fetch_count
;
851 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (number_of_elements
< 1) ? SQ_SEL_0
: SQ_SEL_X
;
852 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (number_of_elements
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
853 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (number_of_elements
< 3) ? SQ_SEL_0
: SQ_SEL_Z
;
854 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (number_of_elements
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
856 vfetch_instruction_ptr
->m_Word1
.f
.use_const_fields
= 1;
858 // Destination register
859 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_gpr
= destination_register
;
860 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_rel
= SQ_ABSOLUTE
;
862 vfetch_instruction_ptr
->m_Word2
.f
.offset
= 0;
863 vfetch_instruction_ptr
->m_Word2
.f
.const_buf_no_stride
= 0x0;
865 vfetch_instruction_ptr
->m_Word2
.f
.mega_fetch
= is_mega_fetch_flag
;
867 if (assembled_vfetch_instruction_ptr
== NULL
)
869 if ( GL_FALSE
== add_vfetch_instruction(pAsm
, (R700VertexInstruction
*)vfetch_instruction_ptr
) )
874 if (pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
] != NULL
)
880 pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
] = vfetch_instruction_ptr
;
887 GLboolean
assemble_vfetch_instruction2(r700_AssemblerBase
* pAsm
,
888 GLuint destination_register
,
895 VTX_FETCH_METHOD
* pFetchMethod
)
897 GLuint client_size_inbyte
;
899 GLuint mega_fetch_count
;
900 GLuint is_mega_fetch_flag
;
902 R700VertexGenericFetch
* vfetch_instruction_ptr
;
903 R700VertexGenericFetch
* assembled_vfetch_instruction_ptr
904 = pAsm
->vfetch_instruction_ptr_array
[element
];
906 if (assembled_vfetch_instruction_ptr
== NULL
)
908 vfetch_instruction_ptr
= (R700VertexGenericFetch
*) CALLOC_STRUCT(R700VertexGenericFetch
);
909 if (vfetch_instruction_ptr
== NULL
)
913 Init_R700VertexGenericFetch(vfetch_instruction_ptr
);
917 vfetch_instruction_ptr
= assembled_vfetch_instruction_ptr
;
920 data_format
= GetSurfaceFormat(type
, size
, &client_size_inbyte
);
922 if(GL_TRUE
== pFetchMethod
->bEnableMini
) //More conditions here
928 mega_fetch_count
= MEGA_FETCH_BYTES
- 1;
929 is_mega_fetch_flag
= 0x1;
930 pFetchMethod
->mega_fetch_remainder
= MEGA_FETCH_BYTES
- client_size_inbyte
;
933 vfetch_instruction_ptr
->m_Word0
.f
.vtx_inst
= SQ_VTX_INST_FETCH
;
934 vfetch_instruction_ptr
->m_Word0
.f
.fetch_type
= SQ_VTX_FETCH_VERTEX_DATA
;
935 vfetch_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
937 vfetch_instruction_ptr
->m_Word0
.f
.buffer_id
= element
;
938 vfetch_instruction_ptr
->m_Word0
.f
.src_gpr
= 0x0;
939 vfetch_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
940 vfetch_instruction_ptr
->m_Word0
.f
.src_sel_x
= SQ_SEL_X
;
941 vfetch_instruction_ptr
->m_Word0
.f
.mega_fetch_count
= mega_fetch_count
;
943 if(format
== GL_BGRA
)
945 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (size
< 1) ? SQ_SEL_0
: SQ_SEL_Z
;
946 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (size
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
947 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (size
< 3) ? SQ_SEL_0
: SQ_SEL_X
;
948 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (size
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
952 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (size
< 1) ? SQ_SEL_0
: SQ_SEL_X
;
953 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (size
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
954 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (size
< 3) ? SQ_SEL_0
: SQ_SEL_Z
;
955 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (size
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
959 vfetch_instruction_ptr
->m_Word1
.f
.use_const_fields
= 1;
960 vfetch_instruction_ptr
->m_Word1
.f
.data_format
= data_format
;
961 vfetch_instruction_ptr
->m_Word2
.f
.endian_swap
= SQ_ENDIAN_NONE
;
965 vfetch_instruction_ptr
->m_Word1
.f
.format_comp_all
= SQ_FORMAT_COMP_SIGNED
;
969 vfetch_instruction_ptr
->m_Word1
.f
.format_comp_all
= SQ_FORMAT_COMP_UNSIGNED
;
972 if(GL_TRUE
== normalize
)
974 vfetch_instruction_ptr
->m_Word1
.f
.num_format_all
= SQ_NUM_FORMAT_NORM
;
978 vfetch_instruction_ptr
->m_Word1
.f
.num_format_all
= SQ_NUM_FORMAT_INT
;
981 // Destination register
982 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_gpr
= destination_register
;
983 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_rel
= SQ_ABSOLUTE
;
985 vfetch_instruction_ptr
->m_Word2
.f
.offset
= 0;
986 vfetch_instruction_ptr
->m_Word2
.f
.const_buf_no_stride
= 0x0;
988 vfetch_instruction_ptr
->m_Word2
.f
.mega_fetch
= is_mega_fetch_flag
;
990 if (assembled_vfetch_instruction_ptr
== NULL
)
992 if ( GL_FALSE
== add_vfetch_instruction(pAsm
, (R700VertexInstruction
*)vfetch_instruction_ptr
) )
997 if (pAsm
->vfetch_instruction_ptr_array
[element
] != NULL
)
1003 pAsm
->vfetch_instruction_ptr_array
[element
] = vfetch_instruction_ptr
;
1010 GLboolean
cleanup_vfetch_instructions(r700_AssemblerBase
* pAsm
)
1013 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
1014 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
1016 for (i
=0; i
<VERT_ATTRIB_MAX
; i
++)
1018 pAsm
->vfetch_instruction_ptr_array
[ i
] = NULL
;
1021 cleanup_vfetch_shaderinst(pAsm
->pR700Shader
);
1026 GLuint
gethelpr(r700_AssemblerBase
* pAsm
)
1028 GLuint r
= pAsm
->uHelpReg
;
1030 if (pAsm
->uHelpReg
> pAsm
->number_used_registers
)
1032 pAsm
->number_used_registers
= pAsm
->uHelpReg
;
1036 void resethelpr(r700_AssemblerBase
* pAsm
)
1038 pAsm
->uHelpReg
= pAsm
->uFirstHelpReg
;
1041 void checkop_init(r700_AssemblerBase
* pAsm
)
1044 pAsm
->aArgSubst
[0] =
1045 pAsm
->aArgSubst
[1] =
1046 pAsm
->aArgSubst
[2] =
1047 pAsm
->aArgSubst
[3] = -1;
1050 GLboolean
mov_temp(r700_AssemblerBase
* pAsm
, int src
)
1052 GLuint tmp
= gethelpr(pAsm
);
1054 //mov src to temp helper gpr.
1055 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
1057 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1059 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1060 pAsm
->D
.dst
.reg
= tmp
;
1062 nomask_PVSDST(&(pAsm
->D
.dst
));
1064 if( GL_FALSE
== assemble_src(pAsm
, src
, 0) )
1069 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
1070 noneg_PVSSRC(&(pAsm
->S
[0].src
));
1072 if( GL_FALSE
== next_ins(pAsm
) )
1077 pAsm
->aArgSubst
[1 + src
] = tmp
;
1082 GLboolean
checkop1(r700_AssemblerBase
* pAsm
)
1088 GLboolean
checkop2(r700_AssemblerBase
* pAsm
)
1090 GLboolean bSrcConst
[2];
1091 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1095 if( (pILInst
->SrcReg
[0].File
== PROGRAM_UNIFORM
) ||
1096 (pILInst
->SrcReg
[0].File
== PROGRAM_CONSTANT
) ||
1097 (pILInst
->SrcReg
[0].File
== PROGRAM_LOCAL_PARAM
) ||
1098 (pILInst
->SrcReg
[0].File
== PROGRAM_ENV_PARAM
) ||
1099 (pILInst
->SrcReg
[0].File
== PROGRAM_STATE_VAR
) )
1101 bSrcConst
[0] = GL_TRUE
;
1105 bSrcConst
[0] = GL_FALSE
;
1107 if( (pILInst
->SrcReg
[1].File
== PROGRAM_UNIFORM
) ||
1108 (pILInst
->SrcReg
[1].File
== PROGRAM_CONSTANT
) ||
1109 (pILInst
->SrcReg
[1].File
== PROGRAM_LOCAL_PARAM
) ||
1110 (pILInst
->SrcReg
[1].File
== PROGRAM_ENV_PARAM
) ||
1111 (pILInst
->SrcReg
[1].File
== PROGRAM_STATE_VAR
) )
1113 bSrcConst
[1] = GL_TRUE
;
1117 bSrcConst
[1] = GL_FALSE
;
1120 if( (bSrcConst
[0] == GL_TRUE
) && (bSrcConst
[1] == GL_TRUE
) )
1122 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[1].Index
)
1124 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1134 GLboolean
checkop3(r700_AssemblerBase
* pAsm
)
1136 GLboolean bSrcConst
[3];
1137 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1141 if( (pILInst
->SrcReg
[0].File
== PROGRAM_UNIFORM
) ||
1142 (pILInst
->SrcReg
[0].File
== PROGRAM_CONSTANT
) ||
1143 (pILInst
->SrcReg
[0].File
== PROGRAM_LOCAL_PARAM
) ||
1144 (pILInst
->SrcReg
[0].File
== PROGRAM_ENV_PARAM
) ||
1145 (pILInst
->SrcReg
[0].File
== PROGRAM_STATE_VAR
) )
1147 bSrcConst
[0] = GL_TRUE
;
1151 bSrcConst
[0] = GL_FALSE
;
1153 if( (pILInst
->SrcReg
[1].File
== PROGRAM_UNIFORM
) ||
1154 (pILInst
->SrcReg
[1].File
== PROGRAM_CONSTANT
) ||
1155 (pILInst
->SrcReg
[1].File
== PROGRAM_LOCAL_PARAM
) ||
1156 (pILInst
->SrcReg
[1].File
== PROGRAM_ENV_PARAM
) ||
1157 (pILInst
->SrcReg
[1].File
== PROGRAM_STATE_VAR
) )
1159 bSrcConst
[1] = GL_TRUE
;
1163 bSrcConst
[1] = GL_FALSE
;
1165 if( (pILInst
->SrcReg
[2].File
== PROGRAM_UNIFORM
) ||
1166 (pILInst
->SrcReg
[2].File
== PROGRAM_CONSTANT
) ||
1167 (pILInst
->SrcReg
[2].File
== PROGRAM_LOCAL_PARAM
) ||
1168 (pILInst
->SrcReg
[2].File
== PROGRAM_ENV_PARAM
) ||
1169 (pILInst
->SrcReg
[2].File
== PROGRAM_STATE_VAR
) )
1171 bSrcConst
[2] = GL_TRUE
;
1175 bSrcConst
[2] = GL_FALSE
;
1178 if( (GL_TRUE
== bSrcConst
[0]) &&
1179 (GL_TRUE
== bSrcConst
[1]) &&
1180 (GL_TRUE
== bSrcConst
[2]) )
1182 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1186 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1193 else if( (GL_TRUE
== bSrcConst
[0]) &&
1194 (GL_TRUE
== bSrcConst
[1]) )
1196 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[1].Index
)
1198 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1206 else if ( (GL_TRUE
== bSrcConst
[0]) &&
1207 (GL_TRUE
== bSrcConst
[2]) )
1209 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[2].Index
)
1211 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1219 else if( (GL_TRUE
== bSrcConst
[1]) &&
1220 (GL_TRUE
== bSrcConst
[2]) )
1222 if(pILInst
->SrcReg
[1].Index
!= pILInst
->SrcReg
[2].Index
)
1224 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1236 GLboolean
assemble_src(r700_AssemblerBase
*pAsm
,
1240 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1247 if(pAsm
->aArgSubst
[1+src
] >= 0)
1249 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1250 pAsm
->S
[fld
].src
.rtype
= SRC_REG_TEMPORARY
;
1251 pAsm
->S
[fld
].src
.reg
= pAsm
->aArgSubst
[1+src
];
1255 switch (pILInst
->SrcReg
[src
].File
)
1257 case PROGRAM_TEMPORARY
:
1258 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1259 pAsm
->S
[fld
].src
.rtype
= SRC_REG_TEMPORARY
;
1260 pAsm
->S
[fld
].src
.reg
= pILInst
->SrcReg
[src
].Index
+ pAsm
->starting_temp_register_number
;
1262 case PROGRAM_CONSTANT
:
1263 case PROGRAM_LOCAL_PARAM
:
1264 case PROGRAM_ENV_PARAM
:
1265 case PROGRAM_STATE_VAR
:
1266 case PROGRAM_UNIFORM
:
1267 if (1 == pILInst
->SrcReg
[src
].RelAddr
)
1269 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_RELATIVE_A0
);
1273 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1276 pAsm
->S
[fld
].src
.rtype
= SRC_REG_CONSTANT
;
1277 if(pILInst
->SrcReg
[src
].Index
< 0)
1279 WARN_ONCE("Negative register offsets not supported yet!\n");
1280 pAsm
->S
[fld
].src
.reg
= 0;
1284 pAsm
->S
[fld
].src
.reg
= pILInst
->SrcReg
[src
].Index
;
1288 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1289 pAsm
->S
[fld
].src
.rtype
= SRC_REG_INPUT
;
1290 switch (pAsm
->currentShaderType
)
1293 pAsm
->S
[fld
].src
.reg
= pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[src
].Index
];
1296 pAsm
->S
[fld
].src
.reg
= pAsm
->ucVP_AttributeMap
[pILInst
->SrcReg
[src
].Index
];
1301 radeon_error("Invalid source argument type : %d \n", pILInst
->SrcReg
[src
].File
);
1306 pAsm
->S
[fld
].src
.swizzlex
= pILInst
->SrcReg
[src
].Swizzle
& 0x7;
1307 pAsm
->S
[fld
].src
.swizzley
= (pILInst
->SrcReg
[src
].Swizzle
>> 3) & 0x7;
1308 pAsm
->S
[fld
].src
.swizzlez
= (pILInst
->SrcReg
[src
].Swizzle
>> 6) & 0x7;
1309 pAsm
->S
[fld
].src
.swizzlew
= (pILInst
->SrcReg
[src
].Swizzle
>> 9) & 0x7;
1311 pAsm
->S
[fld
].src
.negx
= pILInst
->SrcReg
[src
].Negate
& 0x1;
1312 pAsm
->S
[fld
].src
.negy
= (pILInst
->SrcReg
[src
].Negate
>> 1) & 0x1;
1313 pAsm
->S
[fld
].src
.negz
= (pILInst
->SrcReg
[src
].Negate
>> 2) & 0x1;
1314 pAsm
->S
[fld
].src
.negw
= (pILInst
->SrcReg
[src
].Negate
>> 3) & 0x1;
1319 GLboolean
assemble_dst(r700_AssemblerBase
*pAsm
)
1321 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1322 switch (pILInst
->DstReg
.File
)
1324 case PROGRAM_TEMPORARY
:
1325 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1326 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1327 pAsm
->D
.dst
.reg
= pILInst
->DstReg
.Index
+ pAsm
->starting_temp_register_number
;
1329 case PROGRAM_ADDRESS
:
1330 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1331 pAsm
->D
.dst
.rtype
= DST_REG_A0
;
1332 pAsm
->D
.dst
.reg
= 0;
1334 case PROGRAM_OUTPUT
:
1335 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1336 pAsm
->D
.dst
.rtype
= DST_REG_OUT
;
1337 switch (pAsm
->currentShaderType
)
1340 pAsm
->D
.dst
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->DstReg
.Index
];
1343 pAsm
->D
.dst
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->DstReg
.Index
];
1348 radeon_error("Invalid destination output argument type\n");
1352 pAsm
->D
.dst
.writex
= pILInst
->DstReg
.WriteMask
& 0x1;
1353 pAsm
->D
.dst
.writey
= (pILInst
->DstReg
.WriteMask
>> 1) & 0x1;
1354 pAsm
->D
.dst
.writez
= (pILInst
->DstReg
.WriteMask
>> 2) & 0x1;
1355 pAsm
->D
.dst
.writew
= (pILInst
->DstReg
.WriteMask
>> 3) & 0x1;
1357 if(pILInst
->SaturateMode
== SATURATE_ZERO_ONE
)
1359 pAsm
->D2
.dst2
.SaturateMode
= 1;
1363 pAsm
->D2
.dst2
.SaturateMode
= 0;
1369 GLboolean
tex_dst(r700_AssemblerBase
*pAsm
)
1371 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1373 if(PROGRAM_TEMPORARY
== pILInst
->DstReg
.File
)
1375 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1376 pAsm
->D
.dst
.reg
= pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.Index
+ pAsm
->starting_temp_register_number
;
1378 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1380 else if(PROGRAM_OUTPUT
== pILInst
->DstReg
.File
)
1382 pAsm
->D
.dst
.rtype
= DST_REG_OUT
;
1383 switch (pAsm
->currentShaderType
)
1386 pAsm
->D
.dst
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->DstReg
.Index
];
1389 pAsm
->D
.dst
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->DstReg
.Index
];
1393 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1397 radeon_error("Invalid destination output argument type\n");
1401 pAsm
->D
.dst
.writex
= pILInst
->DstReg
.WriteMask
& 0x1;
1402 pAsm
->D
.dst
.writey
= (pILInst
->DstReg
.WriteMask
>> 1) & 0x1;
1403 pAsm
->D
.dst
.writez
= (pILInst
->DstReg
.WriteMask
>> 2) & 0x1;
1404 pAsm
->D
.dst
.writew
= (pILInst
->DstReg
.WriteMask
>> 3) & 0x1;
1409 GLboolean
tex_src(r700_AssemblerBase
*pAsm
)
1411 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1413 GLboolean bValidTexCoord
= GL_FALSE
;
1415 if(pAsm
->aArgSubst
[1] >= 0)
1417 bValidTexCoord
= GL_TRUE
;
1418 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
1419 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
1420 pAsm
->S
[0].src
.reg
= pAsm
->aArgSubst
[1];
1424 switch (pILInst
->SrcReg
[0].File
) {
1425 case PROGRAM_UNIFORM
:
1426 case PROGRAM_CONSTANT
:
1427 case PROGRAM_LOCAL_PARAM
:
1428 case PROGRAM_ENV_PARAM
:
1429 case PROGRAM_STATE_VAR
:
1431 case PROGRAM_TEMPORARY
:
1432 bValidTexCoord
= GL_TRUE
;
1433 pAsm
->S
[0].src
.reg
= pILInst
->SrcReg
[0].Index
+
1434 pAsm
->starting_temp_register_number
;
1435 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
1438 if(SPT_VP
== pAsm
->currentShaderType
)
1440 switch (pILInst
->SrcReg
[0].Index
)
1442 case VERT_ATTRIB_TEX0
:
1443 case VERT_ATTRIB_TEX1
:
1444 case VERT_ATTRIB_TEX2
:
1445 case VERT_ATTRIB_TEX3
:
1446 case VERT_ATTRIB_TEX4
:
1447 case VERT_ATTRIB_TEX5
:
1448 case VERT_ATTRIB_TEX6
:
1449 case VERT_ATTRIB_TEX7
:
1450 bValidTexCoord
= GL_TRUE
;
1451 pAsm
->S
[0].src
.reg
=
1452 pAsm
->ucVP_AttributeMap
[pILInst
->SrcReg
[0].Index
];
1453 pAsm
->S
[0].src
.rtype
= SRC_REG_INPUT
;
1459 switch (pILInst
->SrcReg
[0].Index
)
1461 case FRAG_ATTRIB_WPOS
:
1462 case FRAG_ATTRIB_COL0
:
1463 case FRAG_ATTRIB_COL1
:
1464 case FRAG_ATTRIB_FOGC
:
1465 case FRAG_ATTRIB_TEX0
:
1466 case FRAG_ATTRIB_TEX1
:
1467 case FRAG_ATTRIB_TEX2
:
1468 case FRAG_ATTRIB_TEX3
:
1469 case FRAG_ATTRIB_TEX4
:
1470 case FRAG_ATTRIB_TEX5
:
1471 case FRAG_ATTRIB_TEX6
:
1472 case FRAG_ATTRIB_TEX7
:
1473 bValidTexCoord
= GL_TRUE
;
1474 pAsm
->S
[0].src
.reg
=
1475 pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[0].Index
];
1476 pAsm
->S
[0].src
.rtype
= SRC_REG_INPUT
;
1478 case FRAG_ATTRIB_FACE
:
1479 fprintf(stderr
, "FRAG_ATTRIB_FACE unsupported\n");
1481 case FRAG_ATTRIB_PNTC
:
1482 fprintf(stderr
, "FRAG_ATTRIB_PNTC unsupported\n");
1486 if( (pILInst
->SrcReg
[0].Index
>= FRAG_ATTRIB_VAR0
) ||
1487 (pILInst
->SrcReg
[0].Index
< FRAG_ATTRIB_MAX
) )
1489 bValidTexCoord
= GL_TRUE
;
1490 pAsm
->S
[0].src
.reg
=
1491 pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[0].Index
];
1492 pAsm
->S
[0].src
.rtype
= SRC_REG_INPUT
;
1500 if(GL_TRUE
== bValidTexCoord
)
1502 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
1506 radeon_error("Invalid source texcoord for TEX instruction\n");
1510 pAsm
->S
[0].src
.swizzlex
= pILInst
->SrcReg
[0].Swizzle
& 0x7;
1511 pAsm
->S
[0].src
.swizzley
= (pILInst
->SrcReg
[0].Swizzle
>> 3) & 0x7;
1512 pAsm
->S
[0].src
.swizzlez
= (pILInst
->SrcReg
[0].Swizzle
>> 6) & 0x7;
1513 pAsm
->S
[0].src
.swizzlew
= (pILInst
->SrcReg
[0].Swizzle
>> 9) & 0x7;
1515 pAsm
->S
[0].src
.negx
= pILInst
->SrcReg
[0].Negate
& 0x1;
1516 pAsm
->S
[0].src
.negy
= (pILInst
->SrcReg
[0].Negate
>> 1) & 0x1;
1517 pAsm
->S
[0].src
.negz
= (pILInst
->SrcReg
[0].Negate
>> 2) & 0x1;
1518 pAsm
->S
[0].src
.negw
= (pILInst
->SrcReg
[0].Negate
>> 3) & 0x1;
1523 GLboolean
assemble_tex_instruction(r700_AssemblerBase
*pAsm
, GLboolean normalized
)
1525 PVSSRC
* texture_coordinate_source
;
1526 PVSSRC
* texture_unit_source
;
1528 R700TextureInstruction
* tex_instruction_ptr
= (R700TextureInstruction
*) CALLOC_STRUCT(R700TextureInstruction
);
1529 if (tex_instruction_ptr
== NULL
)
1533 Init_R700TextureInstruction(tex_instruction_ptr
);
1535 texture_coordinate_source
= &(pAsm
->S
[0].src
);
1536 texture_unit_source
= &(pAsm
->S
[1].src
);
1538 tex_instruction_ptr
->m_Word0
.f
.tex_inst
= pAsm
->D
.dst
.opcode
;
1539 tex_instruction_ptr
->m_Word0
.f
.bc_frac_mode
= 0x0;
1540 tex_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
1541 tex_instruction_ptr
->m_Word0
.f
.alt_const
= 0;
1543 if(SPT_VP
== pAsm
->currentShaderType
)
1545 tex_instruction_ptr
->m_Word0
.f
.resource_id
= texture_unit_source
->reg
+ VERT_ATTRIB_MAX
;
1546 pAsm
->unVetTexBits
|= 1 << texture_unit_source
->reg
;
1550 tex_instruction_ptr
->m_Word0
.f
.resource_id
= texture_unit_source
->reg
;
1553 tex_instruction_ptr
->m_Word1
.f
.lod_bias
= 0x0;
1555 tex_instruction_ptr
->m_Word1
.f
.coord_type_x
= SQ_TEX_NORMALIZED
;
1556 tex_instruction_ptr
->m_Word1
.f
.coord_type_y
= SQ_TEX_NORMALIZED
;
1557 tex_instruction_ptr
->m_Word1
.f
.coord_type_z
= SQ_TEX_NORMALIZED
;
1558 tex_instruction_ptr
->m_Word1
.f
.coord_type_w
= SQ_TEX_NORMALIZED
;
1560 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
1561 tex_instruction_ptr
->m_Word1
.f
.coord_type_x
= SQ_TEX_UNNORMALIZED
;
1562 tex_instruction_ptr
->m_Word1
.f
.coord_type_y
= SQ_TEX_UNNORMALIZED
;
1563 tex_instruction_ptr
->m_Word1
.f
.coord_type_z
= SQ_TEX_UNNORMALIZED
;
1564 tex_instruction_ptr
->m_Word1
.f
.coord_type_w
= SQ_TEX_UNNORMALIZED
;
1567 tex_instruction_ptr
->m_Word2
.f
.offset_x
= 0x0;
1568 tex_instruction_ptr
->m_Word2
.f
.offset_y
= 0x0;
1569 tex_instruction_ptr
->m_Word2
.f
.offset_z
= 0x0;
1570 tex_instruction_ptr
->m_Word2
.f
.sampler_id
= texture_unit_source
->reg
;
1573 if ( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
1574 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
1576 tex_instruction_ptr
->m_Word0
.f
.src_gpr
= texture_coordinate_source
->reg
;
1577 tex_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
1579 tex_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
1580 tex_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
;
1582 tex_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (pAsm
->D
.dst
.writex
? texture_unit_source
->swizzlex
: SQ_SEL_MASK
);
1583 tex_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (pAsm
->D
.dst
.writey
? texture_unit_source
->swizzley
: SQ_SEL_MASK
);
1584 tex_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (pAsm
->D
.dst
.writez
? texture_unit_source
->swizzlez
: SQ_SEL_MASK
);
1585 tex_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (pAsm
->D
.dst
.writew
? texture_unit_source
->swizzlew
: SQ_SEL_MASK
);
1588 tex_instruction_ptr
->m_Word2
.f
.src_sel_x
= texture_coordinate_source
->swizzlex
;
1589 tex_instruction_ptr
->m_Word2
.f
.src_sel_y
= texture_coordinate_source
->swizzley
;
1590 tex_instruction_ptr
->m_Word2
.f
.src_sel_z
= texture_coordinate_source
->swizzlez
;
1591 tex_instruction_ptr
->m_Word2
.f
.src_sel_w
= texture_coordinate_source
->swizzlew
;
1595 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
1599 if( GL_FALSE
== add_tex_instruction(pAsm
, tex_instruction_ptr
) )
1607 void initialize(r700_AssemblerBase
*pAsm
)
1609 GLuint cycle
, component
;
1611 for (cycle
=0; cycle
<NUMBER_OF_CYCLES
; cycle
++)
1613 for (component
=0; component
<NUMBER_OF_COMPONENTS
; component
++)
1615 pAsm
->hw_gpr
[cycle
][component
] = (-1);
1618 for (component
=0; component
<NUMBER_OF_COMPONENTS
; component
++)
1620 pAsm
->hw_cfile_addr
[component
] = (-1);
1621 pAsm
->hw_cfile_chan
[component
] = (-1);
1625 GLboolean
assemble_alu_src(R700ALUInstruction
* alu_instruction_ptr
,
1628 BITS scalar_channel_index
)
1635 //--------------------------------------------------------------------------
1636 // Source for operands src0, src1.
1637 // Values [0,127] correspond to GPR[0..127].
1638 // Values [256,511] correspond to cfile constants c[0..255].
1640 //--------------------------------------------------------------------------
1641 // Other special values are shown in the list below.
1643 // 248 SQ_ALU_SRC_0: special constant 0.0.
1644 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
1646 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1647 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1649 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1650 // 253 SQ_ALU_SRC_LITERAL: literal constant.
1652 // 254 SQ_ALU_SRC_PV: previous vector result.
1653 // 255 SQ_ALU_SRC_PS: previous scalar result.
1654 //--------------------------------------------------------------------------
1656 BITS channel_swizzle
;
1657 switch (scalar_channel_index
)
1659 case 0: channel_swizzle
= pSource
->swizzlex
; break;
1660 case 1: channel_swizzle
= pSource
->swizzley
; break;
1661 case 2: channel_swizzle
= pSource
->swizzlez
; break;
1662 case 3: channel_swizzle
= pSource
->swizzlew
; break;
1663 default: channel_swizzle
= SQ_SEL_MASK
; break;
1666 if(channel_swizzle
== SQ_SEL_0
)
1668 src_sel
= SQ_ALU_SRC_0
;
1670 else if (channel_swizzle
== SQ_SEL_1
)
1672 src_sel
= SQ_ALU_SRC_1
;
1676 if ( (pSource
->rtype
== SRC_REG_TEMPORARY
) ||
1677 (pSource
->rtype
== SRC_REG_INPUT
)
1680 src_sel
= pSource
->reg
;
1682 else if (pSource
->rtype
== SRC_REG_CONSTANT
)
1684 src_sel
= pSource
->reg
+ CFILE_REGISTER_OFFSET
;
1686 else if (pSource
->rtype
== SRC_REC_LITERAL
)
1688 src_sel
= SQ_ALU_SRC_LITERAL
;
1692 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
1693 source_index
, pSource
->rtype
);
1698 if( ADDR_ABSOLUTE
== addrmode_PVSSRC(pSource
) )
1700 src_rel
= SQ_ABSOLUTE
;
1704 src_rel
= SQ_RELATIVE
;
1707 switch (channel_swizzle
)
1710 src_chan
= SQ_CHAN_X
;
1713 src_chan
= SQ_CHAN_Y
;
1716 src_chan
= SQ_CHAN_Z
;
1719 src_chan
= SQ_CHAN_W
;
1723 // Does not matter since src_sel controls
1724 src_chan
= SQ_CHAN_X
;
1727 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle
);
1732 switch (scalar_channel_index
)
1734 case 0: src_neg
= pSource
->negx
; break;
1735 case 1: src_neg
= pSource
->negy
; break;
1736 case 2: src_neg
= pSource
->negz
; break;
1737 case 3: src_neg
= pSource
->negw
; break;
1738 default: src_neg
= 0; break;
1741 switch (source_index
)
1744 alu_instruction_ptr
->m_Word0
.f
.src0_sel
= src_sel
;
1745 alu_instruction_ptr
->m_Word0
.f
.src0_rel
= src_rel
;
1746 alu_instruction_ptr
->m_Word0
.f
.src0_chan
= src_chan
;
1747 alu_instruction_ptr
->m_Word0
.f
.src0_neg
= src_neg
;
1750 alu_instruction_ptr
->m_Word0
.f
.src1_sel
= src_sel
;
1751 alu_instruction_ptr
->m_Word0
.f
.src1_rel
= src_rel
;
1752 alu_instruction_ptr
->m_Word0
.f
.src1_chan
= src_chan
;
1753 alu_instruction_ptr
->m_Word0
.f
.src1_neg
= src_neg
;
1756 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_sel
= src_sel
;
1757 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_rel
= src_rel
;
1758 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_chan
= src_chan
;
1759 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_neg
= src_neg
;
1762 radeon_error("Only three sources allowed in ALU opcodes.\n");
1770 GLboolean
add_alu_instruction(r700_AssemblerBase
* pAsm
,
1771 R700ALUInstruction
* alu_instruction_ptr
,
1772 GLuint contiguous_slots_needed
)
1774 if( GL_FALSE
== check_current_clause(pAsm
, CF_ALU_CLAUSE
) )
1779 if ( pAsm
->alu_x_opcode
!= 0 ||
1780 pAsm
->cf_current_alu_clause_ptr
== NULL
||
1781 ( (pAsm
->cf_current_alu_clause_ptr
!= NULL
) &&
1782 (pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
>= (GetCFMaxInstructions(pAsm
->cf_current_alu_clause_ptr
->m_ShaderInstType
)-contiguous_slots_needed
-1) )
1786 //new cf inst for this clause
1787 pAsm
->cf_current_alu_clause_ptr
= (R700ControlFlowALUClause
*) CALLOC_STRUCT(R700ControlFlowALUClause
);
1789 // link the new cf to cf segment
1790 if(NULL
!= pAsm
->cf_current_alu_clause_ptr
)
1792 Init_R700ControlFlowALUClause(pAsm
->cf_current_alu_clause_ptr
);
1793 AddCFInstruction( pAsm
->pR700Shader
,
1794 (R700ControlFlowInstruction
*)pAsm
->cf_current_alu_clause_ptr
);
1798 radeon_error("Could not allocate a new ALU CF instruction.\n");
1802 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank0
= 0x0;
1803 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank1
= 0x0;
1804 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_mode0
= SQ_CF_KCACHE_NOP
;
1806 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_mode1
= SQ_CF_KCACHE_NOP
;
1807 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr0
= 0x0;
1808 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr1
= 0x0;
1810 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
= 0x0;
1812 if(pAsm
->alu_x_opcode
!= 0)
1814 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.cf_inst
= pAsm
->alu_x_opcode
;
1815 pAsm
->alu_x_opcode
= 0;
1819 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_ALU
;
1822 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
1824 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
1828 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
+= (GetInstructionSize(alu_instruction_ptr
->m_ShaderInstType
) / 2);
1831 // If this clause constains any instruction that is forward dependent on a TEX instruction,
1832 // set the whole_quad_mode for this clause
1833 if ( pAsm
->pInstDeps
[pAsm
->uiCurInst
].nDstDep
> (-1) )
1835 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x1;
1838 if (pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
>= (GetCFMaxInstructions(pAsm
->cf_current_alu_clause_ptr
->m_ShaderInstType
)-1) )
1840 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
1843 if(NULL
== pAsm
->cf_current_alu_clause_ptr
->m_pLinkedALUInstruction
)
1845 pAsm
->cf_current_alu_clause_ptr
->m_pLinkedALUInstruction
= alu_instruction_ptr
;
1846 alu_instruction_ptr
->m_pLinkedALUClause
= pAsm
->cf_current_alu_clause_ptr
;
1849 AddALUInstruction(pAsm
->pR700Shader
, alu_instruction_ptr
);
1854 void get_src_properties(R700ALUInstruction
* alu_instruction_ptr
,
1861 switch (source_index
)
1864 *psrc_sel
= alu_instruction_ptr
->m_Word0
.f
.src0_sel
;
1865 *psrc_rel
= alu_instruction_ptr
->m_Word0
.f
.src0_rel
;
1866 *psrc_chan
= alu_instruction_ptr
->m_Word0
.f
.src0_chan
;
1867 *psrc_neg
= alu_instruction_ptr
->m_Word0
.f
.src0_neg
;
1871 *psrc_sel
= alu_instruction_ptr
->m_Word0
.f
.src1_sel
;
1872 *psrc_rel
= alu_instruction_ptr
->m_Word0
.f
.src1_rel
;
1873 *psrc_chan
= alu_instruction_ptr
->m_Word0
.f
.src1_chan
;
1874 *psrc_neg
= alu_instruction_ptr
->m_Word0
.f
.src1_neg
;
1878 *psrc_sel
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_sel
;
1879 *psrc_rel
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_rel
;
1880 *psrc_chan
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_chan
;
1881 *psrc_neg
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_neg
;
1886 int is_cfile(BITS sel
)
1888 if (sel
> 255 && sel
< 512)
1895 int is_const(BITS sel
)
1901 else if(sel
>= SQ_ALU_SRC_0
&& sel
<= SQ_ALU_SRC_LITERAL
)
1908 int is_gpr(BITS sel
)
1910 if (sel
>= 0 && sel
< 128)
1917 const GLuint BANK_SWIZZLE_VEC
[8] = {SQ_ALU_VEC_210
, //000
1918 SQ_ALU_VEC_120
, //001
1919 SQ_ALU_VEC_102
, //010
1921 SQ_ALU_VEC_201
, //011
1922 SQ_ALU_VEC_012
, //100
1923 SQ_ALU_VEC_021
, //101
1925 SQ_ALU_VEC_012
, //110
1926 SQ_ALU_VEC_012
}; //111
1928 const GLuint BANK_SWIZZLE_SCL
[8] = {SQ_ALU_SCL_210
, //000
1929 SQ_ALU_SCL_122
, //001
1930 SQ_ALU_SCL_122
, //010
1932 SQ_ALU_SCL_221
, //011
1933 SQ_ALU_SCL_212
, //100
1934 SQ_ALU_SCL_122
, //101
1936 SQ_ALU_SCL_122
, //110
1937 SQ_ALU_SCL_122
}; //111
1939 GLboolean
reserve_cfile(r700_AssemblerBase
* pAsm
,
1943 int res_match
= (-1);
1944 int res_empty
= (-1);
1948 for (res
=3; res
>=0; res
--)
1950 if(pAsm
->hw_cfile_addr
[ res
] < 0)
1954 else if( (pAsm
->hw_cfile_addr
[res
] == (int)sel
)
1956 (pAsm
->hw_cfile_chan
[ res
] == (int) chan
) )
1964 // Read for this scalar component already reserved, nothing to do here.
1967 else if(res_empty
>= 0)
1969 pAsm
->hw_cfile_addr
[ res_empty
] = sel
;
1970 pAsm
->hw_cfile_chan
[ res_empty
] = chan
;
1974 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
1980 GLboolean
reserve_gpr(r700_AssemblerBase
* pAsm
, GLuint sel
, GLuint chan
, GLuint cycle
)
1982 if(pAsm
->hw_gpr
[cycle
][chan
] < 0)
1984 pAsm
->hw_gpr
[cycle
][chan
] = sel
;
1986 else if(pAsm
->hw_gpr
[cycle
][chan
] != (int)sel
)
1988 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
1995 GLboolean
cycle_for_scalar_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
)
1999 case SQ_ALU_SCL_210
:
2001 int table
[3] = {2, 1, 0};
2002 *pCycle
= table
[sel
];
2006 case SQ_ALU_SCL_122
:
2008 int table
[3] = {1, 2, 2};
2009 *pCycle
= table
[sel
];
2013 case SQ_ALU_SCL_212
:
2015 int table
[3] = {2, 1, 2};
2016 *pCycle
= table
[sel
];
2020 case SQ_ALU_SCL_221
:
2022 int table
[3] = {2, 2, 1};
2023 *pCycle
= table
[sel
];
2028 radeon_error("Bad Scalar bank swizzle value\n");
2035 GLboolean
cycle_for_vector_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
)
2039 case SQ_ALU_VEC_012
:
2041 int table
[3] = {0, 1, 2};
2042 *pCycle
= table
[sel
];
2045 case SQ_ALU_VEC_021
:
2047 int table
[3] = {0, 2, 1};
2048 *pCycle
= table
[sel
];
2051 case SQ_ALU_VEC_120
:
2053 int table
[3] = {1, 2, 0};
2054 *pCycle
= table
[sel
];
2057 case SQ_ALU_VEC_102
:
2059 int table
[3] = {1, 0, 2};
2060 *pCycle
= table
[sel
];
2063 case SQ_ALU_VEC_201
:
2065 int table
[3] = {2, 0, 1};
2066 *pCycle
= table
[sel
];
2069 case SQ_ALU_VEC_210
:
2071 int table
[3] = {2, 1, 0};
2072 *pCycle
= table
[sel
];
2076 radeon_error("Bad Vec bank swizzle value\n");
2084 GLboolean
check_scalar(r700_AssemblerBase
* pAsm
,
2085 R700ALUInstruction
* alu_instruction_ptr
)
2088 GLuint bank_swizzle
;
2089 GLuint const_count
= 0;
2098 BITS src_sel
[3] = {0,0,0};
2099 BITS src_chan
[3] = {0,0,0};
2100 BITS src_rel
[3] = {0,0,0};
2101 BITS src_neg
[3] = {0,0,0};
2105 GLuint number_of_operands
= r700GetNumOperands(pAsm
->D
.dst
.opcode
, pAsm
->D
.dst
.op3
);
2107 for (src
=0; src
<number_of_operands
; src
++)
2109 get_src_properties(alu_instruction_ptr
,
2118 swizzle_key
= ( (is_const( src_sel
[0] ) ? 4 : 0) +
2119 (is_const( src_sel
[1] ) ? 2 : 0) +
2120 (is_const( src_sel
[2] ) ? 1 : 0) );
2122 alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
= BANK_SWIZZLE_SCL
[ swizzle_key
];
2124 for (src
=0; src
<number_of_operands
; src
++)
2126 sel
= src_sel
[src
];
2127 chan
= src_chan
[src
];
2128 rel
= src_rel
[src
];
2129 neg
= src_neg
[src
];
2131 if (is_const( sel
))
2133 // Any constant, including literal and inline constants
2136 if (is_cfile( sel
))
2138 reserve_cfile(pAsm
, sel
, chan
);
2144 for (src
=0; src
<number_of_operands
; src
++)
2146 sel
= src_sel
[src
];
2147 chan
= src_chan
[src
];
2148 rel
= src_rel
[src
];
2149 neg
= src_neg
[src
];
2153 bank_swizzle
= alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
;
2155 if( GL_FALSE
== cycle_for_scalar_bank_swizzle(bank_swizzle
, src
, &cycle
) )
2160 if(cycle
< const_count
)
2162 if( GL_FALSE
== reserve_gpr(pAsm
, sel
, chan
, cycle
) )
2173 GLboolean
check_vector(r700_AssemblerBase
* pAsm
,
2174 R700ALUInstruction
* alu_instruction_ptr
)
2177 GLuint bank_swizzle
;
2178 GLuint const_count
= 0;
2187 BITS src_sel
[3] = {0,0,0};
2188 BITS src_chan
[3] = {0,0,0};
2189 BITS src_rel
[3] = {0,0,0};
2190 BITS src_neg
[3] = {0,0,0};
2194 GLuint number_of_operands
= r700GetNumOperands(pAsm
->D
.dst
.opcode
, pAsm
->D
.dst
.op3
);
2196 for (src
=0; src
<number_of_operands
; src
++)
2198 get_src_properties(alu_instruction_ptr
,
2207 swizzle_key
= ( (is_const( src_sel
[0] ) ? 4 : 0) +
2208 (is_const( src_sel
[1] ) ? 2 : 0) +
2209 (is_const( src_sel
[2] ) ? 1 : 0)
2212 alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
= BANK_SWIZZLE_VEC
[swizzle_key
];
2214 for (src
=0; src
<number_of_operands
; src
++)
2216 sel
= src_sel
[src
];
2217 chan
= src_chan
[src
];
2218 rel
= src_rel
[src
];
2219 neg
= src_neg
[src
];
2222 bank_swizzle
= alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
;
2226 if( GL_FALSE
== cycle_for_vector_bank_swizzle(bank_swizzle
, src
, &cycle
) )
2232 (sel
== src_sel
[0]) &&
2233 (chan
== src_chan
[0]) )
2238 if( GL_FALSE
== reserve_gpr(pAsm
, sel
, chan
, cycle
) )
2244 else if( is_const(sel
) )
2250 if( GL_FALSE
== reserve_cfile(pAsm
, sel
, chan
) )
2261 GLboolean
assemble_alu_instruction(r700_AssemblerBase
*pAsm
)
2263 R700ALUInstruction
* alu_instruction_ptr
;
2264 R700ALUInstructionHalfLiteral
* alu_instruction_ptr_hl
;
2265 R700ALUInstructionFullLiteral
* alu_instruction_ptr_fl
;
2267 GLuint number_of_scalar_operations
;
2268 GLboolean is_single_scalar_operation
;
2269 GLuint scalar_channel_index
;
2271 PVSSRC
* pcurrent_source
;
2272 int current_source_index
;
2273 GLuint contiguous_slots_needed
;
2275 GLuint uNumSrc
= r700GetNumOperands(pAsm
->D
.dst
.opcode
, pAsm
->D
.dst
.op3
);
2276 //GLuint channel_swizzle, j;
2277 //GLuint chan_counter[4] = {0, 0, 0, 0};
2278 //PVSSRC * pSource[3];
2279 GLboolean bSplitInst
= GL_FALSE
;
2281 if (1 == pAsm
->D
.dst
.math
)
2283 is_single_scalar_operation
= GL_TRUE
;
2284 number_of_scalar_operations
= 1;
2288 is_single_scalar_operation
= GL_FALSE
;
2289 number_of_scalar_operations
= 4;
2291 /* current assembler doesn't do more than 1 register per source */
2293 /* check read port, only very preliminary algorithm, not count in
2294 src0/1 same comp case and prev slot repeat case; also not count relative
2295 addressing. TODO: improve performance. */
2296 for(j
=0; j
<uNumSrc
; j
++)
2298 pSource
[j
] = &(pAsm
->S
[j
].src
);
2300 for(scalar_channel_index
=0; scalar_channel_index
<4; scalar_channel_index
++)
2302 for(j
=0; j
<uNumSrc
; j
++)
2304 switch (scalar_channel_index
)
2306 case 0: channel_swizzle
= pSource
[j
]->swizzlex
; break;
2307 case 1: channel_swizzle
= pSource
[j
]->swizzley
; break;
2308 case 2: channel_swizzle
= pSource
[j
]->swizzlez
; break;
2309 case 3: channel_swizzle
= pSource
[j
]->swizzlew
; break;
2310 default: channel_swizzle
= SQ_SEL_MASK
; break;
2312 if ( ((pSource
[j
]->rtype
== SRC_REG_TEMPORARY
) ||
2313 (pSource
[j
]->rtype
== SRC_REG_INPUT
))
2314 && (channel_swizzle
<= SQ_SEL_W
) )
2316 chan_counter
[channel_swizzle
]++;
2320 if( (chan_counter
[SQ_SEL_X
] > 3)
2321 || (chan_counter
[SQ_SEL_Y
] > 3)
2322 || (chan_counter
[SQ_SEL_Z
] > 3)
2323 || (chan_counter
[SQ_SEL_W
] > 3) ) /* each chan bank has only 3 ports. */
2325 bSplitInst
= GL_TRUE
;
2330 contiguous_slots_needed
= 0;
2332 if(!is_single_scalar_operation
)
2334 contiguous_slots_needed
= 4;
2337 contiguous_slots_needed
+= pAsm
->D2
.dst2
.literal_slots
;
2341 for (scalar_channel_index
=0;
2342 scalar_channel_index
< number_of_scalar_operations
;
2343 scalar_channel_index
++)
2345 if(scalar_channel_index
== (number_of_scalar_operations
-1))
2347 switch(pAsm
->D2
.dst2
.literal_slots
)
2350 alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2351 Init_R700ALUInstruction(alu_instruction_ptr
);
2354 alu_instruction_ptr_hl
= (R700ALUInstructionHalfLiteral
*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral
);
2355 Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl
, pAsm
->C
[0].f
, pAsm
->C
[1].f
);
2356 alu_instruction_ptr
= (R700ALUInstruction
*)alu_instruction_ptr_hl
;
2359 alu_instruction_ptr_fl
= (R700ALUInstructionFullLiteral
*) CALLOC_STRUCT(R700ALUInstructionFullLiteral
);
2360 Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl
,pAsm
->C
[0].f
, pAsm
->C
[1].f
, pAsm
->C
[2].f
, pAsm
->C
[3].f
);
2361 alu_instruction_ptr
= (R700ALUInstruction
*)alu_instruction_ptr_fl
;
2367 alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2368 Init_R700ALUInstruction(alu_instruction_ptr
);
2372 current_source_index
= 0;
2373 pcurrent_source
= &(pAsm
->S
[0].src
);
2375 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2376 current_source_index
,
2378 scalar_channel_index
) )
2386 current_source_index
= 1;
2387 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2389 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2390 current_source_index
,
2392 scalar_channel_index
) )
2399 alu_instruction_ptr
->m_Word0
.f
.index_mode
= pAsm
->D2
.dst2
.index_mode
;
2401 if( (is_single_scalar_operation
== GL_TRUE
)
2402 || (GL_TRUE
== bSplitInst
) )
2404 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
2408 alu_instruction_ptr
->m_Word0
.f
.last
= (scalar_channel_index
== 3) ? 1 : 0;
2411 alu_instruction_ptr
->m_Word0
.f
.pred_sel
= (pAsm
->D
.dst
.pred_inv
> 0) ? 1 : 0;
2412 if(1 == pAsm
->D
.dst
.predicated
)
2414 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0x1;
2415 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0x1;
2419 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0x0;
2420 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0x0;
2424 if( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
2425 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
2427 alu_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
2431 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2435 alu_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
; //D.rtype
2437 if ( is_single_scalar_operation
== GL_TRUE
)
2439 // Override scalar_channel_index since only one scalar value will be written
2440 if(pAsm
->D
.dst
.writex
)
2442 scalar_channel_index
= 0;
2444 else if(pAsm
->D
.dst
.writey
)
2446 scalar_channel_index
= 1;
2448 else if(pAsm
->D
.dst
.writez
)
2450 scalar_channel_index
= 2;
2452 else if(pAsm
->D
.dst
.writew
)
2454 scalar_channel_index
= 3;
2458 alu_instruction_ptr
->m_Word1
.f
.dst_chan
= scalar_channel_index
;
2460 alu_instruction_ptr
->m_Word1
.f
.clamp
= pAsm
->D2
.dst2
.SaturateMode
;
2462 if (pAsm
->D
.dst
.op3
)
2466 alu_instruction_ptr
->m_Word1_OP3
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2468 //There's 3rd src for op3
2469 current_source_index
= 2;
2470 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2472 if ( GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2473 current_source_index
,
2475 scalar_channel_index
) )
2485 alu_instruction_ptr
->m_Word1_OP2
.f6
.alu_inst
= pAsm
->D
.dst
.opcode
;
2487 alu_instruction_ptr
->m_Word1_OP2
.f6
.src0_abs
= pAsm
->S
[0].src
.abs
;
2488 alu_instruction_ptr
->m_Word1_OP2
.f6
.src1_abs
= pAsm
->S
[1].src
.abs
;
2490 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2491 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2492 switch (scalar_channel_index
)
2495 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writex
;
2498 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writey
;
2501 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writez
;
2504 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writew
;
2507 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= 1; //SQ_SEL_MASK;
2510 alu_instruction_ptr
->m_Word1_OP2
.f6
.omod
= SQ_ALU_OMOD_OFF
;
2514 alu_instruction_ptr
->m_Word1_OP2
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
2516 alu_instruction_ptr
->m_Word1_OP2
.f
.src0_abs
= pAsm
->S
[0].src
.abs
;
2517 alu_instruction_ptr
->m_Word1_OP2
.f
.src1_abs
= pAsm
->S
[1].src
.abs
;
2519 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2520 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2521 switch (scalar_channel_index
)
2524 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writex
;
2527 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writey
;
2530 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writez
;
2533 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writew
;
2536 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= 1; //SQ_SEL_MASK;
2539 alu_instruction_ptr
->m_Word1_OP2
.f
.omod
= SQ_ALU_OMOD_OFF
;
2543 if(GL_FALSE
== add_alu_instruction(pAsm
, alu_instruction_ptr
, contiguous_slots_needed
) )
2549 * Judge the type of current instruction, is it vector or scalar
2552 if (is_single_scalar_operation
)
2554 if(GL_FALSE
== check_scalar(pAsm
, alu_instruction_ptr
) )
2561 if(GL_FALSE
== check_vector(pAsm
, alu_instruction_ptr
) )
2567 contiguous_slots_needed
-= 1;
2573 GLboolean
next_ins(r700_AssemblerBase
*pAsm
)
2575 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
2577 if( GL_TRUE
== pAsm
->is_tex
)
2579 if (pILInst
->TexSrcTarget
== TEXTURE_RECT_INDEX
) {
2580 if( GL_FALSE
== assemble_tex_instruction(pAsm
, GL_FALSE
) )
2582 radeon_error("Error assembling TEX instruction\n");
2586 if( GL_FALSE
== assemble_tex_instruction(pAsm
, GL_TRUE
) )
2588 radeon_error("Error assembling TEX instruction\n");
2595 if( GL_FALSE
== assemble_alu_instruction(pAsm
) )
2597 radeon_error("Error assembling ALU instruction\n");
2602 if(pAsm
->D
.dst
.rtype
== DST_REG_OUT
)
2606 // There is no mask for OP3 instructions, so all channels are written
2607 pAsm
->pucOutMask
[pAsm
->D
.dst
.reg
- pAsm
->starting_export_register_number
] = 0xF;
2611 pAsm
->pucOutMask
[pAsm
->D
.dst
.reg
- pAsm
->starting_export_register_number
]
2612 |= (unsigned char)pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
;
2616 //reset for next inst.
2619 pAsm
->S
[0].bits
= 0;
2620 pAsm
->S
[1].bits
= 0;
2621 pAsm
->S
[2].bits
= 0;
2622 pAsm
->is_tex
= GL_FALSE
;
2623 pAsm
->need_tex_barrier
= GL_FALSE
;
2625 pAsm
->C
[0].bits
= pAsm
->C
[1].bits
= pAsm
->C
[2].bits
= pAsm
->C
[3].bits
= 0;
2629 GLboolean
assemble_math_function(r700_AssemblerBase
* pAsm
, BITS opcode
)
2635 tmp
= gethelpr(pAsm
);
2637 // opcode tmp.x, a.x
2640 pAsm
->D
.dst
.opcode
= opcode
;
2641 pAsm
->D
.dst
.math
= 1;
2643 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2644 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2645 pAsm
->D
.dst
.reg
= tmp
;
2646 pAsm
->D
.dst
.writex
= 1;
2648 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2653 if ( GL_FALSE
== next_ins(pAsm
) )
2658 // Now replicate result to all necessary channels in destination
2659 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2661 if( GL_FALSE
== assemble_dst(pAsm
) )
2666 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2667 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
2668 pAsm
->S
[0].src
.reg
= tmp
;
2670 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
2671 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2673 if( GL_FALSE
== next_ins(pAsm
) )
2681 GLboolean
assemble_ABS(r700_AssemblerBase
*pAsm
)
2685 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
2687 if( GL_FALSE
== assemble_dst(pAsm
) )
2691 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2696 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
2697 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
2699 if ( GL_FALSE
== next_ins(pAsm
) )
2707 GLboolean
assemble_ADD(r700_AssemblerBase
*pAsm
)
2709 if( GL_FALSE
== checkop2(pAsm
) )
2714 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
2716 if( GL_FALSE
== assemble_dst(pAsm
) )
2721 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2726 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
2731 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_SUB
)
2733 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
2736 if( GL_FALSE
== next_ins(pAsm
) )
2744 GLboolean
assemble_ARL(r700_AssemblerBase
*pAsm
)
2745 { /* TODO: ar values dont' persist between clauses */
2746 if( GL_FALSE
== checkop1(pAsm
) )
2751 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOVA_FLOOR
;
2752 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2753 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2754 pAsm
->D
.dst
.reg
= 0;
2755 pAsm
->D
.dst
.writex
= 0;
2756 pAsm
->D
.dst
.writey
= 0;
2757 pAsm
->D
.dst
.writez
= 0;
2758 pAsm
->D
.dst
.writew
= 0;
2760 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2765 if( GL_FALSE
== next_ins(pAsm
) )
2773 GLboolean
assemble_BAD(char *opcode_str
)
2775 radeon_error("Not yet implemented instruction (%s)\n", opcode_str
);
2779 GLboolean
assemble_CMP(r700_AssemblerBase
*pAsm
)
2783 if( GL_FALSE
== checkop3(pAsm
) )
2788 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_CNDGE
;
2789 pAsm
->D
.dst
.op3
= 1;
2793 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
2795 //OP3 has no support for write mask
2796 tmp
= gethelpr(pAsm
);
2798 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2799 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2800 pAsm
->D
.dst
.reg
= tmp
;
2802 nomask_PVSDST(&(pAsm
->D
.dst
));
2806 if( GL_FALSE
== assemble_dst(pAsm
) )
2812 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2817 if( GL_FALSE
== assemble_src(pAsm
, 2, 1) )
2822 if( GL_FALSE
== assemble_src(pAsm
, 1, 2) )
2827 if ( GL_FALSE
== next_ins(pAsm
) )
2832 if (0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
2834 if( GL_FALSE
== assemble_dst(pAsm
) )
2839 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
2842 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2843 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2844 pAsm
->S
[0].src
.reg
= tmp
;
2846 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2847 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
2849 if( GL_FALSE
== next_ins(pAsm
) )
2858 GLboolean
assemble_TRIG(r700_AssemblerBase
*pAsm
, BITS opcode
)
2863 tmp
= gethelpr(pAsm
);
2865 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
2866 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2867 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2868 pAsm
->D
.dst
.reg
= tmp
;
2869 pAsm
->D
.dst
.writex
= 1;
2871 assemble_src(pAsm
, 0, -1);
2873 pAsm
->S
[1].src
.rtype
= SRC_REC_LITERAL
;
2874 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
2875 pAsm
->D2
.dst2
.literal_slots
= 1;
2876 pAsm
->C
[0].f
= 1/(3.1415926535 * 2);
2877 pAsm
->C
[1].f
= 0.0F
;
2880 pAsm
->D
.dst
.opcode
= opcode
;
2881 pAsm
->D
.dst
.math
= 1;
2885 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
2886 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
2887 pAsm
->S
[0].src
.reg
= tmp
;
2888 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
2889 noneg_PVSSRC(&(pAsm
->S
[0].src
));
2893 //TODO - replicate if more channels set in WriteMask
2898 GLboolean
assemble_DOT(r700_AssemblerBase
*pAsm
)
2900 if( GL_FALSE
== checkop2(pAsm
) )
2905 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_DOT4
;
2907 if( GL_FALSE
== assemble_dst(pAsm
) )
2912 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2917 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
2922 if(OPCODE_DP3
== pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
)
2924 zerocomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
2925 zerocomp_PVSSRC(&(pAsm
->S
[1].src
), 3);
2927 else if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_DPH
)
2929 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
2932 if ( GL_FALSE
== next_ins(pAsm
) )
2940 GLboolean
assemble_DST(r700_AssemblerBase
*pAsm
)
2942 if( GL_FALSE
== checkop2(pAsm
) )
2947 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
2949 if( GL_FALSE
== assemble_dst(pAsm
) )
2954 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
2959 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
2964 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 0);
2965 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
2967 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 0);
2968 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 2);
2970 if ( GL_FALSE
== next_ins(pAsm
) )
2978 GLboolean
assemble_EX2(r700_AssemblerBase
*pAsm
)
2980 return assemble_math_function(pAsm
, SQ_OP2_INST_EXP_IEEE
);
2983 GLboolean
assemble_EXP(r700_AssemblerBase
*pAsm
)
2989 tmp
= gethelpr(pAsm
);
2994 if (pAsm
->pILInst
->DstReg
.WriteMask
& 0x1) {
2995 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
2997 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
2998 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
2999 pAsm
->D
.dst
.reg
= tmp
;
3000 pAsm
->D
.dst
.writex
= 1;
3002 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3007 if( GL_FALSE
== next_ins(pAsm
) )
3012 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3013 pAsm
->D
.dst
.math
= 1;
3015 if( GL_FALSE
== assemble_dst(pAsm
) )
3020 pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3022 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3023 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3024 pAsm
->S
[0].src
.reg
= tmp
;
3026 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3027 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3029 if( GL_FALSE
== next_ins(pAsm
) )
3037 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 1) & 0x1) {
3038 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
3040 if( GL_FALSE
== assemble_dst(pAsm
) )
3045 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3050 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3052 if( GL_FALSE
== next_ins(pAsm
) )
3060 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 2) & 0x1) {
3061 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3062 pAsm
->D
.dst
.math
= 1;
3064 if( GL_FALSE
== assemble_dst(pAsm
) )
3069 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3074 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writew
= 0;
3076 if( GL_FALSE
== next_ins(pAsm
) )
3084 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 3) & 0x1) {
3085 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3087 if( GL_FALSE
== assemble_dst(pAsm
) )
3092 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= 0;
3094 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3095 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3096 pAsm
->S
[0].src
.reg
= tmp
;
3098 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_1
);
3099 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3101 if( GL_FALSE
== next_ins(pAsm
) )
3110 GLboolean
assemble_FLR(r700_AssemblerBase
*pAsm
)
3114 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
3116 if ( GL_FALSE
== assemble_dst(pAsm
) )
3121 if ( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3126 if ( GL_FALSE
== next_ins(pAsm
) )
3134 GLboolean
assemble_FLR_INT(r700_AssemblerBase
*pAsm
)
3136 return assemble_math_function(pAsm
, SQ_OP2_INST_FLT_TO_INT
);
3139 GLboolean
assemble_FRC(r700_AssemblerBase
*pAsm
)
3143 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
3145 if ( GL_FALSE
== assemble_dst(pAsm
) )
3150 if ( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3155 if ( GL_FALSE
== next_ins(pAsm
) )
3163 GLboolean
assemble_KIL(r700_AssemblerBase
*pAsm
, GLuint opcode
)
3165 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
3167 if(pILInst
->Opcode
== OPCODE_KIL
)
3170 pAsm
->D
.dst
.opcode
= opcode
;
3171 //pAsm->D.dst.math = 1;
3173 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3174 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3175 pAsm
->D
.dst
.reg
= 0;
3176 pAsm
->D
.dst
.writex
= 0;
3177 pAsm
->D
.dst
.writey
= 0;
3178 pAsm
->D
.dst
.writez
= 0;
3179 pAsm
->D
.dst
.writew
= 0;
3181 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3182 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3183 pAsm
->S
[0].src
.reg
= 0;
3184 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_0
);
3185 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3187 if(pILInst
->Opcode
== OPCODE_KIL_NV
)
3189 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3190 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3191 pAsm
->S
[1].src
.reg
= 0;
3192 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_1
);
3193 neg_PVSSRC(&(pAsm
->S
[1].src
));
3197 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
3204 if ( GL_FALSE
== next_ins(pAsm
) )
3209 /* Doc says KILL has to be last(end) ALU clause */
3210 pAsm
->pR700Shader
->killIsUsed
= GL_TRUE
;
3211 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
3216 GLboolean
assemble_LG2(r700_AssemblerBase
*pAsm
)
3218 return assemble_math_function(pAsm
, SQ_OP2_INST_LOG_IEEE
);
3221 GLboolean
assemble_LRP(r700_AssemblerBase
*pAsm
)
3225 if( GL_FALSE
== checkop3(pAsm
) )
3230 tmp
= gethelpr(pAsm
);
3232 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
3234 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3235 pAsm
->D
.dst
.reg
= tmp
;
3236 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3237 nomask_PVSDST(&(pAsm
->D
.dst
));
3240 if( GL_FALSE
== assemble_src(pAsm
, 1, 0) )
3245 if ( GL_FALSE
== assemble_src(pAsm
, 2, 1) )
3250 neg_PVSSRC(&(pAsm
->S
[1].src
));
3252 if( GL_FALSE
== next_ins(pAsm
) )
3257 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
3258 pAsm
->D
.dst
.op3
= 1;
3260 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3261 pAsm
->D
.dst
.reg
= tmp
;
3262 nomask_PVSDST(&(pAsm
->D
.dst
));
3263 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3265 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3266 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3267 pAsm
->S
[0].src
.reg
= tmp
;
3268 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3271 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
3276 if( GL_FALSE
== assemble_src(pAsm
, 2, -1) )
3281 if( GL_FALSE
== next_ins(pAsm
) )
3286 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3288 if( GL_FALSE
== assemble_dst(pAsm
) )
3293 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3294 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3295 pAsm
->S
[0].src
.reg
= tmp
;
3296 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3298 if( GL_FALSE
== next_ins(pAsm
) )
3306 GLboolean
assemble_LOG(r700_AssemblerBase
*pAsm
)
3308 BITS tmp1
, tmp2
, tmp3
;
3312 tmp1
= gethelpr(pAsm
);
3313 tmp2
= gethelpr(pAsm
);
3314 tmp3
= gethelpr(pAsm
);
3316 // FIXME: The hardware can do fabs() directly on input
3317 // elements, but the compiler doesn't have the
3318 // capability to use that.
3320 // MAX tmp1.x, a.x, -a.x (fabs(a.x))
3322 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3324 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3325 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3326 pAsm
->D
.dst
.reg
= tmp1
;
3327 pAsm
->D
.dst
.writex
= 1;
3329 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3334 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
3335 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
3337 if ( GL_FALSE
== next_ins(pAsm
) )
3344 // LG2 tmp2.x, tmp1.x
3345 // FLOOR tmp3.x, tmp2.x
3346 // MOV dst.x, tmp3.x
3347 // ADD tmp3.x, tmp2.x, -tmp3.x
3348 // EX2 dst.y, tmp3.x
3349 // MOV dst.z, tmp2.x
3352 // LG2 tmp2.x, tmp1.x
3353 // FLOOR tmp3.x, tmp2.x
3355 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_IEEE
;
3356 pAsm
->D
.dst
.math
= 1;
3358 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3359 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3360 pAsm
->D
.dst
.reg
= tmp2
;
3361 pAsm
->D
.dst
.writex
= 1;
3363 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3364 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3365 pAsm
->S
[0].src
.reg
= tmp1
;
3367 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3368 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3370 if( GL_FALSE
== next_ins(pAsm
) )
3375 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
3377 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3378 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3379 pAsm
->D
.dst
.reg
= tmp3
;
3380 pAsm
->D
.dst
.writex
= 1;
3382 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3383 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3384 pAsm
->S
[0].src
.reg
= tmp2
;
3386 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3387 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3389 if( GL_FALSE
== next_ins(pAsm
) )
3394 // MOV dst.x, tmp3.x
3396 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3398 if( GL_FALSE
== assemble_dst(pAsm
) )
3403 pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3405 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3406 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3407 pAsm
->S
[0].src
.reg
= tmp3
;
3409 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3410 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3412 if( GL_FALSE
== next_ins(pAsm
) )
3417 // ADD tmp3.x, tmp2.x, -tmp3.x
3418 // EX2 dst.y, tmp3.x
3420 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
3422 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3423 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3424 pAsm
->D
.dst
.reg
= tmp3
;
3425 pAsm
->D
.dst
.writex
= 1;
3427 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3428 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3429 pAsm
->S
[0].src
.reg
= tmp2
;
3431 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3432 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3434 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3435 pAsm
->S
[1].src
.rtype
= DST_REG_TEMPORARY
;
3436 pAsm
->S
[1].src
.reg
= tmp3
;
3438 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
3439 neg_PVSSRC(&(pAsm
->S
[1].src
));
3441 if( GL_FALSE
== next_ins(pAsm
) )
3446 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3447 pAsm
->D
.dst
.math
= 1;
3449 if( GL_FALSE
== assemble_dst(pAsm
) )
3454 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3456 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3457 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3458 pAsm
->S
[0].src
.reg
= tmp3
;
3460 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3461 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3463 if( GL_FALSE
== next_ins(pAsm
) )
3468 // MOV dst.z, tmp2.x
3470 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3472 if( GL_FALSE
== assemble_dst(pAsm
) )
3477 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writew
= 0;
3479 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3480 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3481 pAsm
->S
[0].src
.reg
= tmp2
;
3483 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3484 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3486 if( GL_FALSE
== next_ins(pAsm
) )
3493 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3495 if( GL_FALSE
== assemble_dst(pAsm
) )
3500 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= 0;
3502 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3503 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3504 pAsm
->S
[0].src
.reg
= tmp1
;
3506 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_1
);
3507 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3509 if( GL_FALSE
== next_ins(pAsm
) )
3517 GLboolean
assemble_MAD(struct r700_AssemblerBase
*pAsm
)
3520 GLboolean bReplaceDst
= GL_FALSE
;
3521 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
3523 if( GL_FALSE
== checkop3(pAsm
) )
3528 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
3529 pAsm
->D
.dst
.op3
= 1;
3533 if(PROGRAM_TEMPORARY
== pILInst
->DstReg
.File
)
3534 { /* TODO : more investigation on MAD src and dst using same register */
3535 for(ii
=0; ii
<3; ii
++)
3537 if( (PROGRAM_TEMPORARY
== pILInst
->SrcReg
[ii
].File
)
3538 && (pILInst
->DstReg
.Index
== pILInst
->SrcReg
[ii
].Index
) )
3540 bReplaceDst
= GL_TRUE
;
3545 if(0xF != pILInst
->DstReg
.WriteMask
)
3546 { /* OP3 has no support for write mask */
3547 bReplaceDst
= GL_TRUE
;
3550 if(GL_TRUE
== bReplaceDst
)
3552 tmp
= gethelpr(pAsm
);
3554 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3555 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3556 pAsm
->D
.dst
.reg
= tmp
;
3558 nomask_PVSDST(&(pAsm
->D
.dst
));
3562 if( GL_FALSE
== assemble_dst(pAsm
) )
3568 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3573 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3578 if( GL_FALSE
== assemble_src(pAsm
, 2, -1) )
3583 if ( GL_FALSE
== next_ins(pAsm
) )
3588 if (GL_TRUE
== bReplaceDst
)
3590 if( GL_FALSE
== assemble_dst(pAsm
) )
3595 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3598 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3599 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3600 pAsm
->S
[0].src
.reg
= tmp
;
3602 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3603 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3605 if( GL_FALSE
== next_ins(pAsm
) )
3615 GLboolean
assemble_LIT(r700_AssemblerBase
*pAsm
)
3617 unsigned int dstReg
;
3618 unsigned int dstType
;
3619 unsigned int srcReg
;
3620 unsigned int srcType
;
3622 int tmp
= gethelpr(pAsm
);
3624 if( GL_FALSE
== assemble_dst(pAsm
) )
3628 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3632 dstReg
= pAsm
->D
.dst
.reg
;
3633 dstType
= pAsm
->D
.dst
.rtype
;
3634 srcReg
= pAsm
->S
[0].src
.reg
;
3635 srcType
= pAsm
->S
[0].src
.rtype
;
3637 /* dst.xw, <- 1.0 */
3638 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3639 pAsm
->D
.dst
.rtype
= dstType
;
3640 pAsm
->D
.dst
.reg
= dstReg
;
3641 pAsm
->D
.dst
.writex
= 1;
3642 pAsm
->D
.dst
.writey
= 0;
3643 pAsm
->D
.dst
.writez
= 0;
3644 pAsm
->D
.dst
.writew
= 1;
3645 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3646 pAsm
->S
[0].src
.reg
= tmp
;
3647 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3648 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3649 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_1
;
3650 pAsm
->S
[0].src
.swizzley
= SQ_SEL_1
;
3651 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_1
;
3652 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_1
;
3653 if( GL_FALSE
== next_ins(pAsm
) )
3658 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3663 /* dst.y = max(src.x, 0.0) */
3664 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3665 pAsm
->D
.dst
.rtype
= dstType
;
3666 pAsm
->D
.dst
.reg
= dstReg
;
3667 pAsm
->D
.dst
.writex
= 0;
3668 pAsm
->D
.dst
.writey
= 1;
3669 pAsm
->D
.dst
.writez
= 0;
3670 pAsm
->D
.dst
.writew
= 0;
3671 pAsm
->S
[0].src
.rtype
= srcType
;
3672 pAsm
->S
[0].src
.reg
= srcReg
;
3673 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3674 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
);
3675 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3676 pAsm
->S
[1].src
.reg
= tmp
;
3677 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3678 noneg_PVSSRC(&(pAsm
->S
[1].src
));
3679 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_0
;
3680 pAsm
->S
[1].src
.swizzley
= SQ_SEL_0
;
3681 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_0
;
3682 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_0
;
3683 if( GL_FALSE
== next_ins(pAsm
) )
3688 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3693 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Y
, SQ_SEL_Y
, SQ_SEL_Y
, SQ_SEL_Y
);
3695 /* dst.z = log(src.y) */
3696 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_CLAMPED
;
3697 pAsm
->D
.dst
.math
= 1;
3698 pAsm
->D
.dst
.rtype
= dstType
;
3699 pAsm
->D
.dst
.reg
= dstReg
;
3700 pAsm
->D
.dst
.writex
= 0;
3701 pAsm
->D
.dst
.writey
= 0;
3702 pAsm
->D
.dst
.writez
= 1;
3703 pAsm
->D
.dst
.writew
= 0;
3704 pAsm
->S
[0].src
.rtype
= srcType
;
3705 pAsm
->S
[0].src
.reg
= srcReg
;
3706 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3707 if( GL_FALSE
== next_ins(pAsm
) )
3712 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3717 if( GL_FALSE
== assemble_src(pAsm
, 0, 2) )
3722 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
);
3724 swizzleagain_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
);
3726 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
3727 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MUL_LIT
;
3728 pAsm
->D
.dst
.math
= 1;
3729 pAsm
->D
.dst
.op3
= 1;
3730 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3731 pAsm
->D
.dst
.reg
= tmp
;
3732 pAsm
->D
.dst
.writex
= 1;
3733 pAsm
->D
.dst
.writey
= 0;
3734 pAsm
->D
.dst
.writez
= 0;
3735 pAsm
->D
.dst
.writew
= 0;
3737 pAsm
->S
[0].src
.rtype
= srcType
;
3738 pAsm
->S
[0].src
.reg
= srcReg
;
3739 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3741 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3742 pAsm
->S
[1].src
.reg
= dstReg
;
3743 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3744 noneg_PVSSRC(&(pAsm
->S
[1].src
));
3745 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_Z
;
3746 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Z
;
3747 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
3748 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_Z
;
3750 pAsm
->S
[2].src
.rtype
= srcType
;
3751 pAsm
->S
[2].src
.reg
= srcReg
;
3752 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
), ADDR_ABSOLUTE
);
3754 if( GL_FALSE
== next_ins(pAsm
) )
3759 /* dst.z = exp(tmp.x) */
3760 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3761 pAsm
->D
.dst
.math
= 1;
3762 pAsm
->D
.dst
.rtype
= dstType
;
3763 pAsm
->D
.dst
.reg
= dstReg
;
3764 pAsm
->D
.dst
.writex
= 0;
3765 pAsm
->D
.dst
.writey
= 0;
3766 pAsm
->D
.dst
.writez
= 1;
3767 pAsm
->D
.dst
.writew
= 0;
3769 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3770 pAsm
->S
[0].src
.reg
= tmp
;
3771 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3772 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3773 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
3774 pAsm
->S
[0].src
.swizzley
= SQ_SEL_X
;
3775 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_X
;
3776 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_X
;
3778 if( GL_FALSE
== next_ins(pAsm
) )
3786 GLboolean
assemble_MAX(r700_AssemblerBase
*pAsm
)
3788 if( GL_FALSE
== checkop2(pAsm
) )
3793 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3795 if( GL_FALSE
== assemble_dst(pAsm
) )
3800 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3805 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3810 if( GL_FALSE
== next_ins(pAsm
) )
3818 GLboolean
assemble_MIN(r700_AssemblerBase
*pAsm
)
3820 if( GL_FALSE
== checkop2(pAsm
) )
3825 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MIN
;
3827 if( GL_FALSE
== assemble_dst(pAsm
) )
3832 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3837 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3842 if( GL_FALSE
== next_ins(pAsm
) )
3850 GLboolean
assemble_MOV(r700_AssemblerBase
*pAsm
)
3854 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3856 if (GL_FALSE
== assemble_dst(pAsm
))
3861 if (GL_FALSE
== assemble_src(pAsm
, 0, -1))
3866 if ( GL_FALSE
== next_ins(pAsm
) )
3874 GLboolean
assemble_MUL(r700_AssemblerBase
*pAsm
)
3876 if( GL_FALSE
== checkop2(pAsm
) )
3881 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3883 if( GL_FALSE
== assemble_dst(pAsm
) )
3888 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3893 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3898 if( GL_FALSE
== next_ins(pAsm
) )
3906 GLboolean
assemble_POW(r700_AssemblerBase
*pAsm
)
3912 tmp
= gethelpr(pAsm
);
3914 // LG2 tmp.x, a.swizzle
3915 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_IEEE
;
3916 pAsm
->D
.dst
.math
= 1;
3918 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3919 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3920 pAsm
->D
.dst
.reg
= tmp
;
3921 nomask_PVSDST(&(pAsm
->D
.dst
));
3923 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3928 if( GL_FALSE
== next_ins(pAsm
) )
3933 // MUL tmp.x, tmp.x, b.swizzle
3934 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3936 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3937 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3938 pAsm
->D
.dst
.reg
= tmp
;
3939 nomask_PVSDST(&(pAsm
->D
.dst
));
3941 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3942 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3943 pAsm
->S
[0].src
.reg
= tmp
;
3944 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3945 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3947 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3952 if( GL_FALSE
== next_ins(pAsm
) )
3957 // EX2 dst.mask, tmp.x
3959 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3960 pAsm
->D
.dst
.math
= 1;
3962 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3963 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3964 pAsm
->D
.dst
.reg
= tmp
;
3965 nomask_PVSDST(&(pAsm
->D
.dst
));
3967 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3968 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3969 pAsm
->S
[0].src
.reg
= tmp
;
3970 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3971 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3973 if( GL_FALSE
== next_ins(pAsm
) )
3978 // Now replicate result to all necessary channels in destination
3979 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3981 if( GL_FALSE
== assemble_dst(pAsm
) )
3986 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3987 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3988 pAsm
->S
[0].src
.reg
= tmp
;
3990 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3991 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3993 if( GL_FALSE
== next_ins(pAsm
) )
4001 GLboolean
assemble_RCP(r700_AssemblerBase
*pAsm
)
4003 return assemble_math_function(pAsm
, SQ_OP2_INST_RECIP_IEEE
);
4006 GLboolean
assemble_RSQ(r700_AssemblerBase
*pAsm
)
4008 return assemble_math_function(pAsm
, SQ_OP2_INST_RECIPSQRT_IEEE
);
4011 GLboolean
assemble_SCS(r700_AssemblerBase
*pAsm
)
4017 tmp
= gethelpr(pAsm
);
4018 /* tmp.x = src /2*PI */
4019 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
4020 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4021 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4022 pAsm
->D
.dst
.reg
= tmp
;
4023 pAsm
->D
.dst
.writex
= 1;
4025 assemble_src(pAsm
, 0, -1);
4027 pAsm
->S
[1].src
.rtype
= SRC_REC_LITERAL
;
4028 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
4029 pAsm
->D2
.dst2
.literal_slots
= 1;
4030 pAsm
->C
[0].f
= 1/(3.1415926535 * 2);
4031 pAsm
->C
[1].f
= 0.0F
;
4036 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_COS
;
4037 pAsm
->D
.dst
.math
= 1;
4041 pAsm
->D
.dst
.writey
= 0;
4043 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4044 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4045 pAsm
->S
[0].src
.reg
= tmp
;
4046 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4047 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4049 if ( GL_FALSE
== next_ins(pAsm
) )
4055 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SIN
;
4056 pAsm
->D
.dst
.math
= 1;
4060 pAsm
->D
.dst
.writex
= 0;
4062 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4063 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4064 pAsm
->S
[0].src
.reg
= tmp
;
4065 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4066 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4068 if( GL_FALSE
== next_ins(pAsm
) )
4076 GLboolean
assemble_LOGIC(r700_AssemblerBase
*pAsm
, BITS opcode
)
4078 if( GL_FALSE
== checkop2(pAsm
) )
4083 pAsm
->D
.dst
.opcode
= opcode
;
4084 //pAsm->D.dst.math = 1;
4086 if( GL_FALSE
== assemble_dst(pAsm
) )
4091 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4096 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4101 if( GL_FALSE
== next_ins(pAsm
) )
4109 GLboolean
assemble_LOGIC_PRED(r700_AssemblerBase
*pAsm
, BITS opcode
)
4111 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
4113 pAsm
->D
.dst
.opcode
= opcode
;
4114 pAsm
->D
.dst
.math
= 1;
4115 pAsm
->D
.dst
.predicated
= 1;
4117 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4118 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4119 pAsm
->D
.dst
.reg
= pAsm
->uHelpReg
;
4120 pAsm
->D
.dst
.writex
= 1;
4121 pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
4123 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4124 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4125 pAsm
->S
[0].src
.reg
= pAsm
->last_cond_register
+ pAsm
->starting_temp_register_number
;
4126 pAsm
->S
[0].src
.swizzlex
= pILInst
->DstReg
.CondSwizzle
& 0x7;
4127 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4129 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4130 pAsm
->S
[1].src
.reg
= pAsm
->uHelpReg
;
4131 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4132 noneg_PVSSRC(&(pAsm
->S
[1].src
));
4133 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_0
;
4134 pAsm
->S
[1].src
.swizzley
= SQ_SEL_0
;
4135 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_0
;
4136 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_0
;
4138 if( GL_FALSE
== next_ins(pAsm
) )
4146 GLboolean
assemble_SGE(r700_AssemblerBase
*pAsm
)
4148 if( GL_FALSE
== checkop2(pAsm
) )
4153 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SETGE
;
4155 if( GL_FALSE
== assemble_dst(pAsm
) )
4160 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4165 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4170 if( GL_FALSE
== next_ins(pAsm
) )
4178 GLboolean
assemble_SLT(r700_AssemblerBase
*pAsm
)
4180 if( GL_FALSE
== checkop2(pAsm
) )
4185 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SETGT
;
4187 if( GL_FALSE
== assemble_dst(pAsm
) )
4192 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
4197 if( GL_FALSE
== assemble_src(pAsm
, 1, 0) )
4202 if( GL_FALSE
== next_ins(pAsm
) )
4210 GLboolean
assemble_STP(r700_AssemblerBase
*pAsm
)
4215 GLboolean
assemble_TEX(r700_AssemblerBase
*pAsm
)
4217 GLboolean src_const
;
4218 GLboolean need_barrier
= GL_FALSE
;
4222 switch (pAsm
->pILInst
[pAsm
->uiCurInst
].SrcReg
[0].File
)
4224 case PROGRAM_UNIFORM
:
4225 case PROGRAM_CONSTANT
:
4226 case PROGRAM_LOCAL_PARAM
:
4227 case PROGRAM_ENV_PARAM
:
4228 case PROGRAM_STATE_VAR
:
4229 src_const
= GL_TRUE
;
4231 case PROGRAM_TEMPORARY
:
4234 src_const
= GL_FALSE
;
4238 if (GL_TRUE
== src_const
)
4240 if ( GL_FALSE
== mov_temp(pAsm
, 0) )
4242 need_barrier
= GL_TRUE
;
4245 if (pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_TXP
)
4247 GLuint tmp
= gethelpr(pAsm
);
4248 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_RECIP_IEEE
;
4249 pAsm
->D
.dst
.math
= 1;
4250 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4251 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4252 pAsm
->D
.dst
.reg
= tmp
;
4253 pAsm
->D
.dst
.writew
= 1;
4255 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4259 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
);
4260 if( GL_FALSE
== next_ins(pAsm
) )
4265 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
4266 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4267 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4268 pAsm
->D
.dst
.reg
= tmp
;
4269 pAsm
->D
.dst
.writex
= 1;
4270 pAsm
->D
.dst
.writey
= 1;
4271 pAsm
->D
.dst
.writez
= 1;
4272 pAsm
->D
.dst
.writew
= 0;
4274 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4278 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4279 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4280 pAsm
->S
[1].src
.reg
= tmp
;
4281 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_W
);
4283 if( GL_FALSE
== next_ins(pAsm
) )
4288 pAsm
->aArgSubst
[1] = tmp
;
4289 need_barrier
= GL_TRUE
;
4292 if (pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcTarget
== TEXTURE_CUBE_INDEX
)
4294 GLuint tmp1
= gethelpr(pAsm
);
4295 GLuint tmp2
= gethelpr(pAsm
);
4297 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
4298 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_CUBE
;
4299 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4300 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4301 pAsm
->D
.dst
.reg
= tmp1
;
4302 nomask_PVSDST(&(pAsm
->D
.dst
));
4304 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4309 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
4314 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Z
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
);
4315 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Y
, SQ_SEL_X
, SQ_SEL_Z
, SQ_SEL_Z
);
4317 if( GL_FALSE
== next_ins(pAsm
) )
4322 /* tmp1.z = RCP_e(|tmp1.z|) */
4323 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_RECIP_IEEE
;
4324 pAsm
->D
.dst
.math
= 1;
4325 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4326 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4327 pAsm
->D
.dst
.reg
= tmp1
;
4328 pAsm
->D
.dst
.writez
= 1;
4330 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4331 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4332 pAsm
->S
[0].src
.reg
= tmp1
;
4333 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_Z
;
4334 pAsm
->S
[0].src
.abs
= 1;
4338 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
4339 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
4340 * muladd has no writemask, have to use another temp
4342 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
4343 pAsm
->D
.dst
.op3
= 1;
4344 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4345 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4346 pAsm
->D
.dst
.reg
= tmp2
;
4348 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4349 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4350 pAsm
->S
[0].src
.reg
= tmp1
;
4351 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4352 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4353 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4354 pAsm
->S
[1].src
.reg
= tmp1
;
4355 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Z
);
4356 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
), ADDR_ABSOLUTE
);
4357 /* immediate c 1.5 */
4358 pAsm
->D2
.dst2
.literal_slots
= 1;
4359 pAsm
->C
[0].f
= 1.5F
;
4360 pAsm
->S
[2].src
.rtype
= SRC_REC_LITERAL
;
4361 pAsm
->S
[2].src
.reg
= tmp1
;
4362 setswizzle_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_X
);
4366 /* tmp1.xy = temp2.xy */
4367 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4368 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4369 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4370 pAsm
->D
.dst
.reg
= tmp1
;
4371 pAsm
->D
.dst
.writex
= 1;
4372 pAsm
->D
.dst
.writey
= 1;
4373 pAsm
->D
.dst
.writez
= 0;
4374 pAsm
->D
.dst
.writew
= 0;
4376 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4377 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4378 pAsm
->S
[0].src
.reg
= tmp2
;
4379 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4382 pAsm
->aArgSubst
[1] = tmp1
;
4383 need_barrier
= GL_TRUE
;
4387 switch(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
)
4390 /* will these need WQM(1) on CF inst ? */
4391 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_GET_GRADIENTS_H
;
4394 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_GET_GRADIENTS_V
;
4397 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE_L
;
4400 if(pAsm
->pILInst
[pAsm
->uiCurInst
].TexShadow
== 1)
4401 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE_C
;
4403 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE
;
4406 pAsm
->is_tex
= GL_TRUE
;
4407 if ( GL_TRUE
== need_barrier
)
4409 pAsm
->is_tex
= GL_TRUE
;
4410 if ( GL_TRUE
== need_barrier
)
4412 pAsm
->need_tex_barrier
= GL_TRUE
;
4414 // Set src1 to tex unit id
4415 pAsm
->S
[1].src
.reg
= pAsm
->SamplerUnits
[pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcUnit
];
4416 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4418 //No sw info from mesa compiler, so hard code here.
4419 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_X
;
4420 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Y
;
4421 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
4422 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_W
;
4424 if( GL_FALSE
== tex_dst(pAsm
) )
4429 if( GL_FALSE
== tex_src(pAsm
) )
4434 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_TXP
)
4436 /* hopefully did swizzles before */
4437 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4440 if(pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcTarget
== TEXTURE_CUBE_INDEX
)
4442 /* SAMPLE dst, tmp.yxwy, CUBE */
4443 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_Y
;
4444 pAsm
->S
[0].src
.swizzley
= SQ_SEL_X
;
4445 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_W
;
4446 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_Y
;
4449 if(pAsm
->pILInst
[pAsm
->uiCurInst
].TexShadow
== 1)
4451 /* compare value goes to w chan ? */
4452 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_Z
;
4455 if ( GL_FALSE
== next_ins(pAsm
) )
4460 /* add ARB shadow ambient but clamp to 0..1 */
4461 if(pAsm
->pILInst
[pAsm
->uiCurInst
].TexShadow
== 1)
4463 /* ADD_SAT dst, dst, ambient[texunit] */
4464 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
4466 if( GL_FALSE
== assemble_dst(pAsm
) )
4470 pAsm
->D2
.dst2
.SaturateMode
= 1;
4472 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4473 pAsm
->S
[0].src
.reg
= pAsm
->D
.dst
.reg
;
4474 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4475 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4477 pAsm
->S
[1].src
.rtype
= SRC_REG_CONSTANT
;
4478 pAsm
->S
[1].src
.reg
= pAsm
->shadow_regs
[pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcUnit
];
4479 noswizzle_PVSSRC(&(pAsm
->S
[1].src
));
4480 noneg_PVSSRC(&(pAsm
->S
[1].src
));
4482 if( GL_FALSE
== next_ins(pAsm
) )
4492 GLboolean
assemble_XPD(r700_AssemblerBase
*pAsm
)
4496 if( GL_FALSE
== checkop2(pAsm
) )
4501 tmp
= gethelpr(pAsm
);
4503 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
4505 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4506 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4507 pAsm
->D
.dst
.reg
= tmp
;
4508 nomask_PVSDST(&(pAsm
->D
.dst
));
4510 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4515 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4520 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
, SQ_SEL_0
);
4521 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Y
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_0
);
4523 if( GL_FALSE
== next_ins(pAsm
) )
4528 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
4529 pAsm
->D
.dst
.op3
= 1;
4531 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
4533 tmp
= gethelpr(pAsm
);
4535 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4536 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4537 pAsm
->D
.dst
.reg
= tmp
;
4539 nomask_PVSDST(&(pAsm
->D
.dst
));
4543 if( GL_FALSE
== assemble_dst(pAsm
) )
4549 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4554 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4559 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Y
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_0
);
4560 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
, SQ_SEL_0
);
4562 // result1 + (neg) result0
4563 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
),ADDR_ABSOLUTE
);
4564 pAsm
->S
[2].src
.rtype
= SRC_REG_TEMPORARY
;
4565 pAsm
->S
[2].src
.reg
= tmp
;
4567 neg_PVSSRC(&(pAsm
->S
[2].src
));
4568 noswizzle_PVSSRC(&(pAsm
->S
[2].src
));
4570 if( GL_FALSE
== next_ins(pAsm
) )
4576 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
4578 if( GL_FALSE
== assemble_dst(pAsm
) )
4583 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4585 // Use tmp as source
4586 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4587 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4588 pAsm
->S
[0].src
.reg
= tmp
;
4590 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4591 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4593 if( GL_FALSE
== next_ins(pAsm
) )
4602 GLboolean
assemble_EXPORT(r700_AssemblerBase
*pAsm
)
4607 static inline void decreaseCurrent(r700_AssemblerBase
*pAsm
, GLuint uReason
)
4612 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
--;
4615 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
-= 4;
4618 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
-= 4;
4621 /* TODO : for 16 vp asic, should -= 2; */
4622 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
-= 1;
4627 static inline void checkStackDepth(r700_AssemblerBase
*pAsm
, GLuint uReason
, GLboolean bCheckMaxOnly
)
4629 if(GL_TRUE
== bCheckMaxOnly
)
4634 if((pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 1)
4635 > pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
4637 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
=
4638 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 1;
4642 if((pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 4)
4643 > pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
4645 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
=
4646 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 4;
4656 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
++;
4659 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+= 4;
4662 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+= 4;
4665 /* TODO : for 16 vp asic, should += 2; */
4666 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+= 1;
4670 if(pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
4671 > pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
4673 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
=
4674 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
;
4678 GLboolean
jumpToOffest(r700_AssemblerBase
*pAsm
, GLuint pops
, GLint offset
)
4680 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4685 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= pops
;
4686 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4687 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4689 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4690 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4691 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_JUMP
;
4692 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4694 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4696 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ offset
;
4701 GLboolean
pops(r700_AssemblerBase
*pAsm
, GLuint pops
)
4703 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4708 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= pops
;
4709 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4710 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4712 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4713 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4714 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_POP
;
4716 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4718 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4719 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
4724 GLboolean
assemble_IF(r700_AssemblerBase
*pAsm
, GLboolean bHasElse
)
4726 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
4728 assemble_LOGIC_PRED(pAsm
, SQ_OP2_INST_PRED_SETNE
);
4731 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4736 if(GL_TRUE
!= bHasElse
)
4738 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
4742 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
4744 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4745 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4747 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4748 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4749 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_JUMP
;
4750 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4752 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4755 pAsm
->fc_stack
[pAsm
->FCSP
].type
= FC_IF
;
4756 pAsm
->fc_stack
[pAsm
->FCSP
].mid
= NULL
;
4757 pAsm
->fc_stack
[pAsm
->FCSP
].midLen
= 0;
4758 pAsm
->fc_stack
[pAsm
->FCSP
].first
= pAsm
->cf_current_cf_clause_ptr
;
4760 #ifndef USE_CF_FOR_POP_AFTER
4761 if(GL_TRUE
!= bHasElse
)
4763 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_POP_AFTER
;
4765 #endif /* USE_CF_FOR_POP_AFTER */
4767 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_FALSE
);
4772 GLboolean
assemble_ELSE(r700_AssemblerBase
*pAsm
)
4774 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4779 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1; ///
4780 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4781 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4783 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4784 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4785 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_ELSE
;
4786 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4788 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4790 pAsm
->fc_stack
[pAsm
->FCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc( (void *)pAsm
->fc_stack
[pAsm
->FCSP
].mid
,
4792 sizeof(R700ControlFlowGenericClause
*) );
4793 pAsm
->fc_stack
[pAsm
->FCSP
].mid
[0] = pAsm
->cf_current_cf_clause_ptr
;
4794 //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1;
4796 #ifndef USE_CF_FOR_POP_AFTER
4797 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_POP_AFTER
;
4798 #endif /* USE_CF_FOR_POP_AFTER */
4800 pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_Word0
.f
.addr
= pAsm
->pR700Shader
->plstCFInstructions_active
->uNumOfNode
- 1;
4805 GLboolean
assemble_ENDIF(r700_AssemblerBase
*pAsm
)
4807 #ifdef USE_CF_FOR_POP_AFTER
4809 #endif /* USE_CF_FOR_POP_AFTER */
4811 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
4813 if(NULL
== pAsm
->fc_stack
[pAsm
->FCSP
].mid
)
4815 /* no else in between */
4816 pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_Word0
.f
.addr
= pAsm
->pR700Shader
->plstCFInstructions_active
->uNumOfNode
;
4820 pAsm
->fc_stack
[pAsm
->FCSP
].mid
[0]->m_Word0
.f
.addr
= pAsm
->pR700Shader
->plstCFInstructions_active
->uNumOfNode
;
4823 if(NULL
!= pAsm
->fc_stack
[pAsm
->FCSP
].mid
)
4825 FREE(pAsm
->fc_stack
[pAsm
->FCSP
].mid
);
4828 if(pAsm
->fc_stack
[pAsm
->FCSP
].type
!= FC_IF
)
4830 radeon_error("if/endif in shader code are not paired. \n");
4836 decreaseCurrent(pAsm
, FC_PUSH_VPM
);
4841 GLboolean
assemble_BGNLOOP(r700_AssemblerBase
*pAsm
)
4843 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4849 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
4850 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4851 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4853 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4854 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4855 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_START_NO_AL
;
4856 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4858 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4861 pAsm
->fc_stack
[pAsm
->FCSP
].type
= FC_LOOP
;
4862 pAsm
->fc_stack
[pAsm
->FCSP
].mid
= NULL
;
4863 pAsm
->fc_stack
[pAsm
->FCSP
].unNumMid
= 0;
4864 pAsm
->fc_stack
[pAsm
->FCSP
].midLen
= 0;
4865 pAsm
->fc_stack
[pAsm
->FCSP
].first
= pAsm
->cf_current_cf_clause_ptr
;
4867 checkStackDepth(pAsm
, FC_LOOP
, GL_FALSE
);
4872 GLboolean
assemble_BRK(r700_AssemblerBase
*pAsm
)
4874 #ifdef USE_CF_FOR_CONTINUE_BREAK
4876 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
4878 assemble_LOGIC_PRED(pAsm
, SQ_OP2_INST_PRED_SETNE
);
4880 unsigned int unFCSP
;
4881 for(unFCSP
=pAsm
->FCSP
; unFCSP
>0; unFCSP
--)
4883 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
4890 radeon_error("Break is not inside loop/endloop pair.\n");
4894 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4900 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
4901 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4902 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4904 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4905 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4906 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_BREAK
;
4908 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4910 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4912 pAsm
->fc_stack
[unFCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc(
4913 (void *)pAsm
->fc_stack
[unFCSP
].mid
,
4914 sizeof(R700ControlFlowGenericClause
*) * pAsm
->fc_stack
[unFCSP
].unNumMid
,
4915 sizeof(R700ControlFlowGenericClause
*) * (pAsm
->fc_stack
[unFCSP
].unNumMid
+ 1) );
4916 pAsm
->fc_stack
[unFCSP
].mid
[pAsm
->fc_stack
[unFCSP
].unNumMid
] = pAsm
->cf_current_cf_clause_ptr
;
4917 pAsm
->fc_stack
[unFCSP
].unNumMid
++;
4919 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4924 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
4925 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4926 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4928 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4929 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4930 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_POP
;
4932 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4934 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4935 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
4937 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_TRUE
);
4939 #endif //USE_CF_FOR_CONTINUE_BREAK
4943 GLboolean
assemble_CONT(r700_AssemblerBase
*pAsm
)
4945 #ifdef USE_CF_FOR_CONTINUE_BREAK
4946 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
4948 assemble_LOGIC_PRED(pAsm
, SQ_OP2_INST_PRED_SETNE
);
4950 unsigned int unFCSP
;
4951 for(unFCSP
=pAsm
->FCSP
; unFCSP
>0; unFCSP
--)
4953 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
4960 radeon_error("Continue is not inside loop/endloop pair.\n");
4964 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4970 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
4971 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4972 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4974 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4975 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
4976 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_CONTINUE
;
4978 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
4980 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
4982 pAsm
->fc_stack
[unFCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc(
4983 (void *)pAsm
->fc_stack
[unFCSP
].mid
,
4984 sizeof(R700ControlFlowGenericClause
*) * pAsm
->fc_stack
[unFCSP
].unNumMid
,
4985 sizeof(R700ControlFlowGenericClause
*) * (pAsm
->fc_stack
[unFCSP
].unNumMid
+ 1) );
4986 pAsm
->fc_stack
[unFCSP
].mid
[pAsm
->fc_stack
[unFCSP
].unNumMid
] = pAsm
->cf_current_cf_clause_ptr
;
4987 pAsm
->fc_stack
[unFCSP
].unNumMid
++;
4989 if(GL_FALSE
== add_cf_instruction(pAsm
) )
4994 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
4995 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
4996 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
4998 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
4999 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5000 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_POP
;
5002 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5004 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5005 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
5007 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_TRUE
);
5009 #endif /* USE_CF_FOR_CONTINUE_BREAK */
5014 GLboolean
assemble_ENDLOOP(r700_AssemblerBase
*pAsm
)
5018 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5024 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
5025 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5026 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5028 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5029 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5030 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_END
;
5031 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5033 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5035 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_uIndex
+ 1;
5036 pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
5038 #ifdef USE_CF_FOR_CONTINUE_BREAK
5039 for(i
=0; i
<pAsm
->fc_stack
[pAsm
->FCSP
].unNumMid
; i
++)
5041 pAsm
->fc_stack
[pAsm
->FCSP
].mid
[i
]->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
;
5043 if(NULL
!= pAsm
->fc_stack
[pAsm
->FCSP
].mid
)
5045 FREE(pAsm
->fc_stack
[pAsm
->FCSP
].mid
);
5049 if(pAsm
->fc_stack
[pAsm
->FCSP
].type
!= FC_LOOP
)
5051 radeon_error("loop/endloop in shader code are not paired. \n");
5057 if((pAsm
->unCFflags
& HAS_CURRENT_LOOPRET
) > 0)
5059 for(unFCSP
=(pAsm
->FCSP
-1); unFCSP
>pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
; unFCSP
--)
5061 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
5063 breakLoopOnFlag(pAsm
, unFCSP
);
5066 else if(FC_IF
== pAsm
->fc_stack
[unFCSP
].type
)
5071 if(unFCSP
<= pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
)
5073 #ifdef USE_CF_FOR_POP_AFTER
5074 returnOnFlag(pAsm
, unIF
);
5076 returnOnFlag(pAsm
, 0);
5077 #endif /* USE_CF_FOR_POP_AFTER */
5078 pAsm
->unCFflags
&= ~HAS_CURRENT_LOOPRET
;
5084 decreaseCurrent(pAsm
, FC_LOOP
);
5089 void add_return_inst(r700_AssemblerBase
*pAsm
)
5091 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5095 //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5096 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
5097 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5098 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5100 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5101 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5102 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_RETURN
;
5103 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5105 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5108 GLboolean
assemble_BGNSUB(r700_AssemblerBase
*pAsm
, GLint nILindex
, GLuint uiIL_Shift
)
5111 if( (pAsm
->unSubArrayPointer
+ 1) > pAsm
->unSubArraySize
)
5113 pAsm
->subs
= (SUB_OFFSET
*)_mesa_realloc( (void *)pAsm
->subs
,
5114 sizeof(SUB_OFFSET
) * pAsm
->unSubArraySize
,
5115 sizeof(SUB_OFFSET
) * (pAsm
->unSubArraySize
+ 10) );
5116 if(NULL
== pAsm
->subs
)
5120 pAsm
->unSubArraySize
+= 10;
5123 pAsm
->subs
[pAsm
->unSubArrayPointer
].subIL_Offset
= nILindex
+ uiIL_Shift
;
5124 pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
.pHead
=NULL
;
5125 pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
.pTail
=NULL
;
5126 pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
.uNumOfNode
=0;
5129 pAsm
->CALLSTACK
[pAsm
->CALLSP
].subDescIndex
= pAsm
->unSubArrayPointer
;
5130 pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
= pAsm
->FCSP
;
5131 pAsm
->CALLSTACK
[pAsm
->CALLSP
].plstCFInstructions_local
5132 = &(pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
);
5133 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
= 0;
5134 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
= 0;
5135 SetActiveCFlist(pAsm
->pR700Shader
,
5136 pAsm
->CALLSTACK
[pAsm
->CALLSP
].plstCFInstructions_local
);
5138 pAsm
->unSubArrayPointer
++;
5141 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
5144 pAsm
->fc_stack
[pAsm
->FCSP
].type
= FC_REP
;
5146 checkStackDepth(pAsm
, FC_REP
, GL_FALSE
);
5151 GLboolean
assemble_ENDSUB(r700_AssemblerBase
*pAsm
)
5153 if(pAsm
->fc_stack
[pAsm
->FCSP
].type
!= FC_REP
)
5155 radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n");
5159 /* copy max to sub structure */
5160 pAsm
->subs
[pAsm
->CALLSTACK
[pAsm
->CALLSP
].subDescIndex
].unStackDepthMax
5161 = pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
;
5163 decreaseCurrent(pAsm
, FC_REP
);
5166 SetActiveCFlist(pAsm
->pR700Shader
,
5167 pAsm
->CALLSTACK
[pAsm
->CALLSP
].plstCFInstructions_local
);
5169 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
5176 GLboolean
assemble_RET(r700_AssemblerBase
*pAsm
)
5180 if(pAsm
->CALLSP
> 0)
5183 for(unFCSP
=pAsm
->FCSP
; unFCSP
>pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
; unFCSP
--)
5185 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
5187 setRetInLoopFlag(pAsm
, SQ_SEL_1
);
5188 breakLoopOnFlag(pAsm
, unFCSP
);
5189 pAsm
->unCFflags
|= LOOPRET_FLAGS
;
5193 else if(FC_IF
== pAsm
->fc_stack
[unFCSP
].type
)
5200 #ifdef USE_CF_FOR_POP_AFTER
5205 #endif /* USE_CF_FOR_POP_AFTER */
5207 add_return_inst(pAsm
);
5212 GLboolean
assemble_CAL(r700_AssemblerBase
*pAsm
,
5215 GLuint uiNumberInsts
,
5216 struct prog_instruction
*pILInst
,
5217 PRESUB_DESC
* pPresubDesc
)
5221 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
5223 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5228 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.call_count
= 1;
5229 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
5230 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5231 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5233 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5234 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5235 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_CALL
;
5236 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5238 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5241 if( (pAsm
->unCallerArrayPointer
+ 1) > pAsm
->unCallerArraySize
)
5243 pAsm
->callers
= (CALLER_POINTER
*)_mesa_realloc( (void *)pAsm
->callers
,
5244 sizeof(CALLER_POINTER
) * pAsm
->unCallerArraySize
,
5245 sizeof(CALLER_POINTER
) * (pAsm
->unCallerArraySize
+ 10) );
5246 if(NULL
== pAsm
->callers
)
5250 pAsm
->unCallerArraySize
+= 10;
5253 uiIL_Offset
= nILindex
+ uiIL_Shift
;
5254 pAsm
->callers
[pAsm
->unCallerArrayPointer
].subIL_Offset
= uiIL_Offset
;
5255 pAsm
->callers
[pAsm
->unCallerArrayPointer
].cf_ptr
= pAsm
->cf_current_cf_clause_ptr
;
5257 pAsm
->callers
[pAsm
->unCallerArrayPointer
].finale_cf_ptr
= NULL
;
5258 pAsm
->callers
[pAsm
->unCallerArrayPointer
].prelude_cf_ptr
= NULL
;
5260 pAsm
->unCallerArrayPointer
++;
5266 for(j
=0; j
<pAsm
->unSubArrayPointer
; j
++)
5268 if(uiIL_Offset
== pAsm
->subs
[j
].subIL_Offset
)
5269 { /* compiled before */
5271 max
= pAsm
->subs
[j
].unStackDepthMax
5272 + pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
;
5273 if(max
> pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
5275 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
= max
;
5278 pAsm
->callers
[pAsm
->unCallerArrayPointer
- 1].subDescIndex
= j
;
5283 pAsm
->callers
[pAsm
->unCallerArrayPointer
- 1].subDescIndex
= pAsm
->unSubArrayPointer
;
5284 unSubID
= pAsm
->unSubArrayPointer
;
5286 bRet
= AssembleInstr(nILindex
, uiIL_Shift
, uiNumberInsts
, pILInst
, pAsm
);
5290 max
= pAsm
->subs
[unSubID
].unStackDepthMax
5291 + pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
;
5292 if(max
> pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
5294 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
= max
;
5297 pAsm
->subs
[unSubID
].pPresubDesc
= pPresubDesc
;
5303 GLboolean
setRetInLoopFlag(r700_AssemblerBase
*pAsm
, GLuint flagValue
)
5305 /*GLfloat fLiteral[2] = {0.1, 0.0};*/
5307 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
5308 pAsm
->D
.dst
.op3
= 0;
5309 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5310 pAsm
->D
.dst
.reg
= pAsm
->flag_reg_index
;
5311 pAsm
->D
.dst
.writex
= 1;
5312 pAsm
->D
.dst
.writey
= 0;
5313 pAsm
->D
.dst
.writez
= 0;
5314 pAsm
->D
.dst
.writew
= 0;
5315 pAsm
->D2
.dst2
.literal_slots
= 1;
5316 pAsm
->D2
.dst2
.SaturateMode
= SATURATE_OFF
;
5317 pAsm
->D
.dst
.predicated
= 0;
5318 /* in reloc where dislink flag init inst, only one slot alu inst is handled. */
5319 pAsm
->D
.dst
.math
= 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */
5320 pAsm
->D2
.dst2
.index_mode
= SQ_INDEX_LOOP
; /* Check this ! */
5322 pAsm
->S
[0].src
.rtype
= SRC_REC_LITERAL
;
5323 //pAsm->S[0].src.reg = 0;
5324 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5325 noneg_PVSSRC(&(pAsm
->S
[0].src
));
5326 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
5327 pAsm
->S
[0].src
.swizzley
= SQ_SEL_Y
;
5328 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_Z
;
5329 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_W
;
5331 if( GL_FALSE
== next_ins_literal(pAsm
, &(fLiteral
[0])) )
5336 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
5337 pAsm
->S
[0].src
.reg
= 0;
5338 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5339 noneg_PVSSRC(&(pAsm
->S
[0].src
));
5340 pAsm
->S
[0].src
.swizzlex
= flagValue
;
5341 pAsm
->S
[0].src
.swizzley
= flagValue
;
5342 pAsm
->S
[0].src
.swizzlez
= flagValue
;
5343 pAsm
->S
[0].src
.swizzlew
= flagValue
;
5345 if( GL_FALSE
== next_ins(pAsm
) )
5354 GLboolean
testFlag(r700_AssemblerBase
*pAsm
)
5356 /*GLfloat fLiteral[2] = {0.1, 0.0};*/
5359 GLuint tmp
= gethelpr(pAsm
);
5360 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
5362 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_PRED_SETE
;
5363 pAsm
->D
.dst
.math
= 1;
5364 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5365 pAsm
->D
.dst
.reg
= tmp
;
5366 pAsm
->D
.dst
.writex
= 1;
5367 pAsm
->D
.dst
.writey
= 0;
5368 pAsm
->D
.dst
.writez
= 0;
5369 pAsm
->D
.dst
.writew
= 0;
5370 pAsm
->D2
.dst2
.literal_slots
= 1;
5371 pAsm
->D2
.dst2
.SaturateMode
= SATURATE_OFF
;
5372 pAsm
->D
.dst
.predicated
= 1;
5373 pAsm
->D2
.dst2
.index_mode
= SQ_INDEX_LOOP
; /* Check this ! */
5375 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
5376 pAsm
->S
[0].src
.reg
= pAsm
->flag_reg_index
;
5377 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5378 noneg_PVSSRC(&(pAsm
->S
[0].src
));
5379 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
5380 pAsm
->S
[0].src
.swizzley
= SQ_SEL_Y
;
5381 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_Z
;
5382 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_W
;
5384 pAsm
->S
[1].src
.rtype
= SRC_REC_LITERAL
;
5385 //pAsm->S[1].src.reg = 0;
5386 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
5387 noneg_PVSSRC(&(pAsm
->S
[1].src
));
5388 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_X
;
5389 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Y
;
5390 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
5391 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_W
;
5393 if( GL_FALSE
== next_ins_literal(pAsm
, &(fLiteral
[0])) )
5398 pAsm
->S
[1].src
.rtype
= DST_REG_TEMPORARY
;
5399 pAsm
->S
[1].src
.reg
= 0;
5400 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
5401 noneg_PVSSRC(&(pAsm
->S
[1].src
));
5402 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_1
;
5403 pAsm
->S
[1].src
.swizzley
= SQ_SEL_1
;
5404 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_1
;
5405 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_1
;
5407 if( GL_FALSE
== next_ins(pAsm
) )
5413 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_TRUE
);
5418 GLboolean
returnOnFlag(r700_AssemblerBase
*pAsm
, GLuint unIF
)
5421 jumpToOffest(pAsm
, 1, 4);
5422 setRetInLoopFlag(pAsm
, SQ_SEL_0
);
5423 pops(pAsm
, unIF
+ 1);
5424 add_return_inst(pAsm
);
5429 GLboolean
breakLoopOnFlag(r700_AssemblerBase
*pAsm
, GLuint unFCSP
)
5434 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5439 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
5440 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5441 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5443 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5444 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5445 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_BREAK
;
5446 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5448 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5450 pAsm
->fc_stack
[unFCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc(
5451 (void *)pAsm
->fc_stack
[unFCSP
].mid
,
5452 sizeof(R700ControlFlowGenericClause
*) * pAsm
->fc_stack
[unFCSP
].unNumMid
,
5453 sizeof(R700ControlFlowGenericClause
*) * (pAsm
->fc_stack
[unFCSP
].unNumMid
+ 1) );
5454 pAsm
->fc_stack
[unFCSP
].mid
[pAsm
->fc_stack
[unFCSP
].unNumMid
] = pAsm
->cf_current_cf_clause_ptr
;
5455 pAsm
->fc_stack
[unFCSP
].unNumMid
++;
5462 GLboolean
AssembleInstr(GLuint uiFirstInst
,
5464 GLuint uiNumberInsts
,
5465 struct prog_instruction
*pILInst
,
5466 r700_AssemblerBase
*pR700AsmCode
)
5470 pR700AsmCode
->pILInst
= pILInst
;
5471 for(i
=uiFirstInst
; i
<uiNumberInsts
; i
++)
5473 pR700AsmCode
->uiCurInst
= i
;
5475 #ifndef USE_CF_FOR_CONTINUE_BREAK
5476 if(OPCODE_BRK
== pILInst
[i
+1].Opcode
)
5478 switch(pILInst
[i
].Opcode
)
5481 pILInst
[i
].Opcode
= OPCODE_SGT
;
5484 pILInst
[i
].Opcode
= OPCODE_SGE
;
5487 pILInst
[i
].Opcode
= OPCODE_SLT
;
5490 pILInst
[i
].Opcode
= OPCODE_SLE
;
5493 pILInst
[i
].Opcode
= OPCODE_SNE
;
5496 pILInst
[i
].Opcode
= OPCODE_SEQ
;
5503 if(pILInst
[i
].CondUpdate
== 1)
5505 /* remember dest register used for cond evaluation */
5506 /* XXX also handle PROGRAM_OUTPUT registers here? */
5507 pR700AsmCode
->last_cond_register
= pILInst
[i
].DstReg
.Index
;
5510 switch (pILInst
[i
].Opcode
)
5513 if ( GL_FALSE
== assemble_ABS(pR700AsmCode
) )
5518 if ( GL_FALSE
== assemble_ADD(pR700AsmCode
) )
5523 if ( GL_FALSE
== assemble_ARL(pR700AsmCode
) )
5527 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
5528 //if ( GL_FALSE == assemble_BAD("ARR") )
5533 if ( GL_FALSE
== assemble_CMP(pR700AsmCode
) )
5537 if ( GL_FALSE
== assemble_TRIG(pR700AsmCode
, SQ_OP2_INST_COS
) )
5544 if ( GL_FALSE
== assemble_DOT(pR700AsmCode
) )
5549 if ( GL_FALSE
== assemble_DST(pR700AsmCode
) )
5554 if ( GL_FALSE
== assemble_EX2(pR700AsmCode
) )
5558 if ( GL_FALSE
== assemble_EXP(pR700AsmCode
) )
5563 if ( GL_FALSE
== assemble_FLR(pR700AsmCode
) )
5566 //case OP_FLR_INT: ;
5568 // if ( GL_FALSE == assemble_FLR_INT() )
5573 if ( GL_FALSE
== assemble_FRC(pR700AsmCode
) )
5579 if ( GL_FALSE
== assemble_KIL(pR700AsmCode
, SQ_OP2_INST_KILLGT
) )
5583 if ( GL_FALSE
== assemble_LG2(pR700AsmCode
) )
5587 if ( GL_FALSE
== assemble_LIT(pR700AsmCode
) )
5591 if ( GL_FALSE
== assemble_LRP(pR700AsmCode
) )
5595 if ( GL_FALSE
== assemble_LOG(pR700AsmCode
) )
5600 if ( GL_FALSE
== assemble_MAD(pR700AsmCode
) )
5604 if ( GL_FALSE
== assemble_MAX(pR700AsmCode
) )
5608 if ( GL_FALSE
== assemble_MIN(pR700AsmCode
) )
5613 if ( GL_FALSE
== assemble_MOV(pR700AsmCode
) )
5617 if ( GL_FALSE
== assemble_MUL(pR700AsmCode
) )
5623 callPreSub(pR700AsmCode
,
5626 pILInst
->DstReg
.Index
+ pR700AsmCode
->starting_temp_register_number
,
5628 radeon_error("noise1: not yet supported shader instruction\n");
5632 radeon_error("noise2: not yet supported shader instruction\n");
5635 radeon_error("noise3: not yet supported shader instruction\n");
5638 radeon_error("noise4: not yet supported shader instruction\n");
5642 if ( GL_FALSE
== assemble_POW(pR700AsmCode
) )
5646 if ( GL_FALSE
== assemble_RCP(pR700AsmCode
) )
5650 if ( GL_FALSE
== assemble_RSQ(pR700AsmCode
) )
5654 if ( GL_FALSE
== assemble_TRIG(pR700AsmCode
, SQ_OP2_INST_SIN
) )
5658 if ( GL_FALSE
== assemble_SCS(pR700AsmCode
) )
5663 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETE
) )
5670 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETGT
) )
5677 if ( GL_FALSE
== assemble_SGE(pR700AsmCode
) )
5683 /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/
5686 struct prog_src_register SrcRegSave
[2];
5687 SrcRegSave
[0] = pILInst
[i
].SrcReg
[0];
5688 SrcRegSave
[1] = pILInst
[i
].SrcReg
[1];
5689 pILInst
[i
].SrcReg
[0] = SrcRegSave
[1];
5690 pILInst
[i
].SrcReg
[1] = SrcRegSave
[0];
5691 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETGT
) )
5693 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
5694 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
5697 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
5698 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
5704 struct prog_src_register SrcRegSave
[2];
5705 SrcRegSave
[0] = pILInst
[i
].SrcReg
[0];
5706 SrcRegSave
[1] = pILInst
[i
].SrcReg
[1];
5707 pILInst
[i
].SrcReg
[0] = SrcRegSave
[1];
5708 pILInst
[i
].SrcReg
[1] = SrcRegSave
[0];
5709 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETGE
) )
5711 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
5712 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
5715 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
5716 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
5721 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETNE
) )
5728 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
5733 if ( GL_FALSE
== assemble_MOV(pR700AsmCode
) )
5739 if( (i
+1)<uiNumberInsts
)
5741 if(OPCODE_END
!= pILInst
[i
+1].Opcode
)
5743 if( GL_TRUE
== IsTex(pILInst
[i
+1].Opcode
) )
5745 pR700AsmCode
->pInstDeps
[i
+1].nDstDep
= i
+1; //=1?
5756 if ( GL_FALSE
== assemble_TEX(pR700AsmCode
) )
5761 if ( GL_FALSE
== assemble_math_function(pR700AsmCode
, SQ_OP2_INST_TRUNC
) )
5766 if ( GL_FALSE
== assemble_XPD(pR700AsmCode
) )
5772 GLboolean bHasElse
= GL_FALSE
;
5774 if(pILInst
[pILInst
[i
].BranchTarget
].Opcode
== OPCODE_ELSE
)
5779 if ( GL_FALSE
== assemble_IF(pR700AsmCode
, bHasElse
) )
5787 if ( GL_FALSE
== assemble_ELSE(pR700AsmCode
) )
5792 if ( GL_FALSE
== assemble_ENDIF(pR700AsmCode
) )
5796 case OPCODE_BGNLOOP
:
5797 if( GL_FALSE
== assemble_BGNLOOP(pR700AsmCode
) )
5804 if( GL_FALSE
== assemble_BRK(pR700AsmCode
) )
5811 if( GL_FALSE
== assemble_CONT(pR700AsmCode
) )
5817 case OPCODE_ENDLOOP
:
5818 if( GL_FALSE
== assemble_ENDLOOP(pR700AsmCode
) )
5825 if( GL_FALSE
== assemble_BGNSUB(pR700AsmCode
, i
, uiIL_Shift
) )
5832 if( GL_FALSE
== assemble_RET(pR700AsmCode
) )
5839 if( GL_FALSE
== assemble_CAL(pR700AsmCode
,
5840 pILInst
[i
].BranchTarget
,
5850 //case OPCODE_EXPORT:
5851 // if ( GL_FALSE == assemble_EXPORT() )
5856 return assemble_ENDSUB(pR700AsmCode
);
5859 //pR700AsmCode->uiCurInst = i;
5860 //This is to remaind that if in later exoort there is depth/stencil
5861 //export, we need a mov to re-arrange DST channel, where using a
5862 //psuedo inst, we will use this end inst to do it.
5866 radeon_error("internal: unknown instruction\n");
5874 GLboolean
InitShaderProgram(r700_AssemblerBase
* pAsm
)
5876 setRetInLoopFlag(pAsm
, SQ_SEL_0
);
5877 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
5881 GLboolean
RelocProgram(r700_AssemblerBase
* pAsm
, struct gl_program
* pILProg
)
5885 TypedShaderList
* plstCFmain
;
5886 TypedShaderList
* plstCFsub
;
5888 R700ShaderInstruction
* pInst
;
5889 R700ControlFlowGenericClause
* pCFInst
;
5891 R700ControlFlowALUClause
* pCF_ALU
;
5892 R700ALUInstruction
* pALU
;
5893 GLuint unConstOffset
= 0;
5895 GLuint unMinRegIndex
;
5897 plstCFmain
= pAsm
->CALLSTACK
[0].plstCFInstructions_local
;
5899 /* remove flags init if they are not used */
5900 if((pAsm
->unCFflags
& HAS_LOOPRET
) == 0)
5902 R700ControlFlowALUClause
* pCF_ALU
;
5903 pInst
= plstCFmain
->pHead
;
5906 if(SIT_CF_ALU
== pInst
->m_ShaderInstType
)
5908 pCF_ALU
= (R700ControlFlowALUClause
*)pInst
;
5909 if(0 == pCF_ALU
->m_Word1
.f
.count
)
5911 pCF_ALU
->m_Word1
.f
.cf_inst
= SQ_CF_INST_NOP
;
5915 R700ALUInstruction
* pALU
= pCF_ALU
->m_pLinkedALUInstruction
;
5917 pALU
->m_pLinkedALUClause
= NULL
;
5918 pALU
= (R700ALUInstruction
*)(pALU
->pNextInst
);
5919 pALU
->m_pLinkedALUClause
= pCF_ALU
;
5920 pCF_ALU
->m_pLinkedALUInstruction
= pALU
;
5922 pCF_ALU
->m_Word1
.f
.count
--;
5926 pInst
= pInst
->pNextInst
;
5930 if(pAsm
->CALLSTACK
[0].max
> 0)
5932 pAsm
->pR700Shader
->uStackSize
= ((pAsm
->CALLSTACK
[0].max
+ 3)>>2) + 2;
5935 if(0 == pAsm
->unSubArrayPointer
)
5940 unCFoffset
= plstCFmain
->uNumOfNode
;
5942 if(NULL
!= pILProg
->Parameters
)
5944 unConstOffset
= pILProg
->Parameters
->NumParameters
;
5948 for(i
=0; i
<pAsm
->unSubArrayPointer
; i
++)
5950 pAsm
->subs
[i
].unCFoffset
= unCFoffset
;
5951 plstCFsub
= &(pAsm
->subs
[i
].lstCFInstructions_local
);
5953 pInst
= plstCFsub
->pHead
;
5955 /* reloc instructions */
5958 if(SIT_CF_GENERIC
== pInst
->m_ShaderInstType
)
5960 pCFInst
= (R700ControlFlowGenericClause
*)pInst
;
5962 switch (pCFInst
->m_Word1
.f
.cf_inst
)
5964 case SQ_CF_INST_POP
:
5965 case SQ_CF_INST_JUMP
:
5966 case SQ_CF_INST_ELSE
:
5967 case SQ_CF_INST_LOOP_END
:
5968 case SQ_CF_INST_LOOP_START
:
5969 case SQ_CF_INST_LOOP_START_NO_AL
:
5970 case SQ_CF_INST_LOOP_CONTINUE
:
5971 case SQ_CF_INST_LOOP_BREAK
:
5972 pCFInst
->m_Word0
.f
.addr
+= unCFoffset
;
5979 pInst
->m_uIndex
+= unCFoffset
;
5981 pInst
= pInst
->pNextInst
;
5984 if(NULL
!= pAsm
->subs
[i
].pPresubDesc
)
5988 unMinRegIndex
= pAsm
->subs
[i
].pPresubDesc
->pCompiledSub
->MinRegIndex
;
5989 unRegOffset
= pAsm
->subs
[i
].pPresubDesc
->maxStartReg
;
5990 unConstOffset
+= pAsm
->subs
[i
].pPresubDesc
->unConstantsStart
;
5992 pInst
= plstCFsub
->pHead
;
5995 if(SIT_CF_ALU
== pInst
->m_ShaderInstType
)
5997 pCF_ALU
= (R700ControlFlowALUClause
*)pInst
;
5999 pALU
= pCF_ALU
->m_pLinkedALUInstruction
;
6000 for(int j
=0; j
<=pCF_ALU
->m_Word1
.f
.count
; j
++)
6002 pALU
->m_Word1
.f
.dst_gpr
= pALU
->m_Word1
.f
.dst_gpr
+ unRegOffset
- unMinRegIndex
;
6004 if(pALU
->m_Word0
.f
.src0_sel
< SQ_ALU_SRC_GPR_SIZE
)
6006 pALU
->m_Word0
.f
.src0_sel
= pALU
->m_Word0
.f
.src0_sel
+ unRegOffset
- unMinRegIndex
;
6008 else if(pALU
->m_Word0
.f
.src0_sel
>= SQ_ALU_SRC_CFILE_BASE
)
6010 pALU
->m_Word0
.f
.src0_sel
+= unConstOffset
;
6013 if( ((pALU
->m_Word1
.val
>> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT
) & 0x0000001F)
6014 >= SQ_OP3_INST_MUL_LIT
)
6015 { /* op3 : 3 srcs */
6016 if(pALU
->m_Word1_OP3
.f
.src2_sel
< SQ_ALU_SRC_GPR_SIZE
)
6018 pALU
->m_Word1_OP3
.f
.src2_sel
= pALU
->m_Word1_OP3
.f
.src2_sel
+ unRegOffset
- unMinRegIndex
;
6020 else if(pALU
->m_Word1_OP3
.f
.src2_sel
>= SQ_ALU_SRC_CFILE_BASE
)
6022 pALU
->m_Word1_OP3
.f
.src2_sel
+= unConstOffset
;
6024 if(pALU
->m_Word0
.f
.src1_sel
< SQ_ALU_SRC_GPR_SIZE
)
6026 pALU
->m_Word0
.f
.src1_sel
= pALU
->m_Word0
.f
.src1_sel
+ unRegOffset
- unMinRegIndex
;
6028 else if(pALU
->m_Word0
.f
.src1_sel
>= SQ_ALU_SRC_CFILE_BASE
)
6030 pALU
->m_Word0
.f
.src1_sel
+= unConstOffset
;
6037 uNumSrc
= r700GetNumOperands(pALU
->m_Word1_OP2
.f6
.alu_inst
, 0);
6041 uNumSrc
= r700GetNumOperands(pALU
->m_Word1_OP2
.f
.alu_inst
, 0);
6045 if(pALU
->m_Word0
.f
.src1_sel
< SQ_ALU_SRC_GPR_SIZE
)
6047 pALU
->m_Word0
.f
.src1_sel
= pALU
->m_Word0
.f
.src1_sel
+ unRegOffset
- unMinRegIndex
;
6049 else if(pALU
->m_Word0
.f
.src1_sel
>= SQ_ALU_SRC_CFILE_BASE
)
6051 pALU
->m_Word0
.f
.src1_sel
+= unConstOffset
;
6055 pALU
= (R700ALUInstruction
*)(pALU
->pNextInst
);
6058 pInst
= pInst
->pNextInst
;
6062 /* Put sub into main */
6063 plstCFmain
->pTail
->pNextInst
= plstCFsub
->pHead
;
6064 plstCFmain
->pTail
= plstCFsub
->pTail
;
6065 plstCFmain
->uNumOfNode
+= plstCFsub
->uNumOfNode
;
6067 unCFoffset
+= plstCFsub
->uNumOfNode
;
6071 for(i
=0; i
<pAsm
->unCallerArrayPointer
; i
++)
6073 pAsm
->callers
[i
].cf_ptr
->m_Word0
.f
.addr
6074 = pAsm
->subs
[pAsm
->callers
[i
].subDescIndex
].unCFoffset
;
6076 if(NULL
!= pAsm
->subs
[pAsm
->callers
[i
].subDescIndex
].pPresubDesc
)
6078 unMinRegIndex
= pAsm
->subs
[pAsm
->callers
[i
].subDescIndex
].pPresubDesc
->pCompiledSub
->MinRegIndex
;
6079 unRegOffset
= pAsm
->subs
[pAsm
->callers
[i
].subDescIndex
].pPresubDesc
->maxStartReg
;
6081 if(NULL
!= pAsm
->callers
[i
].prelude_cf_ptr
)
6083 pCF_ALU
= (R700ControlFlowALUClause
* )(pAsm
->callers
[i
].prelude_cf_ptr
);
6084 pALU
= pCF_ALU
->m_pLinkedALUInstruction
;
6085 for(int j
=0; j
<=pCF_ALU
->m_Word1
.f
.count
; j
++)
6087 pALU
->m_Word1
.f
.dst_gpr
= pALU
->m_Word1
.f
.dst_gpr
+ unRegOffset
- unMinRegIndex
;
6088 pALU
= (R700ALUInstruction
*)(pALU
->pNextInst
);
6091 if(NULL
!= pAsm
->callers
[i
].finale_cf_ptr
)
6093 pCF_ALU
= (R700ControlFlowALUClause
* )(pAsm
->callers
[i
].finale_cf_ptr
);
6094 pALU
= pCF_ALU
->m_pLinkedALUInstruction
;
6095 for(int j
=0; j
<=pCF_ALU
->m_Word1
.f
.count
; j
++)
6097 pALU
->m_Word0
.f
.src0_sel
= pALU
->m_Word0
.f
.src0_sel
+ unRegOffset
- unMinRegIndex
;
6098 pALU
= (R700ALUInstruction
*)(pALU
->pNextInst
);
6107 GLboolean
callPreSub(r700_AssemblerBase
* pAsm
,
6108 LOADABLE_SCRIPT_SIGNITURE scriptSigniture
,
6109 COMPILED_SUB
* pCompiledSub
,
6111 GLshort uNumValidSrc
)
6113 /* save assemble context */
6114 GLuint starting_temp_register_number_save
;
6115 GLuint number_used_registers_save
;
6116 GLuint uFirstHelpReg_save
;
6117 GLuint uHelpReg_save
;
6118 GLuint uiCurInst_save
;
6119 struct prog_instruction
*pILInst_save
;
6120 PRESUB_DESC
* pPresubDesc
;
6124 R700ControlFlowGenericClause
* prelude_cf_ptr
= NULL
;
6126 /* copy srcs to presub inputs */
6127 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
6128 for(i
=0; i
<uNumValidSrc
; i
++)
6130 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
6131 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
6132 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
6133 pAsm
->D
.dst
.reg
= pCompiledSub
->srcRegIndex
[i
];
6134 pAsm
->D
.dst
.writex
= 1;
6135 pAsm
->D
.dst
.writey
= 1;
6136 pAsm
->D
.dst
.writez
= 1;
6137 pAsm
->D
.dst
.writew
= 1;
6139 if( GL_FALSE
== assemble_src(pAsm
, i
, 0) )
6146 if(uNumValidSrc
> 0)
6148 prelude_cf_ptr
= pAsm
->cf_current_alu_clause_ptr
;
6149 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
6152 /* browse thro existing presubs. */
6153 for(i
=0; i
<pAsm
->unNumPresub
; i
++)
6155 if(pAsm
->presubs
[i
].sptSigniture
== scriptSigniture
)
6161 if(i
== pAsm
->unNumPresub
)
6162 { /* not loaded yet */
6163 /* save assemble context */
6164 number_used_registers_save
= pAsm
->number_used_registers
;
6165 uFirstHelpReg_save
= pAsm
->uFirstHelpReg
;
6166 uHelpReg_save
= pAsm
->uHelpReg
;
6167 starting_temp_register_number_save
= pAsm
->starting_temp_register_number
;
6168 pILInst_save
= pAsm
->pILInst
;
6169 uiCurInst_save
= pAsm
->uiCurInst
;
6171 /* alloc in presub */
6172 if( (pAsm
->unNumPresub
+ 1) > pAsm
->unPresubArraySize
)
6174 pAsm
->presubs
= (PRESUB_DESC
*)_mesa_realloc( (void *)pAsm
->presubs
,
6175 sizeof(PRESUB_DESC
) * pAsm
->unPresubArraySize
,
6176 sizeof(PRESUB_DESC
) * (pAsm
->unPresubArraySize
+ 4) );
6177 if(NULL
== pAsm
->presubs
)
6179 radeon_error("No memeory to allocate built in shader function description structures. \n");
6182 pAsm
->unPresubArraySize
+= 4;
6185 pPresubDesc
= &(pAsm
->presubs
[i
]);
6186 pPresubDesc
->sptSigniture
= scriptSigniture
;
6188 /* constants offsets need to be final resolved at reloc. */
6189 if(0 == pAsm
->unNumPresub
)
6191 pPresubDesc
->unConstantsStart
= 0;
6195 pPresubDesc
->unConstantsStart
= pAsm
->presubs
[i
-1].unConstantsStart
6196 + pAsm
->presubs
[i
-1].pCompiledSub
->NumParameters
;
6199 pPresubDesc
->pCompiledSub
= pCompiledSub
;
6201 pPresubDesc
->subIL_Shift
= pAsm
->unCurNumILInsts
;
6202 pPresubDesc
->maxStartReg
= uFirstHelpReg_save
;
6203 pAsm
->unCurNumILInsts
+= pCompiledSub
->NumInstructions
;
6205 pAsm
->unNumPresub
++;
6207 /* setup new assemble context */
6208 pAsm
->starting_temp_register_number
= 0;
6209 pAsm
->number_used_registers
= pCompiledSub
->NumTemporaries
;
6210 pAsm
->uFirstHelpReg
= pAsm
->number_used_registers
;
6211 pAsm
->uHelpReg
= pAsm
->uFirstHelpReg
;
6213 bRet
= assemble_CAL(pAsm
,
6215 pPresubDesc
->subIL_Shift
,
6216 pCompiledSub
->NumInstructions
,
6217 pCompiledSub
->Instructions
,
6221 pPresubDesc
->number_used_registers
= pAsm
->number_used_registers
;
6223 /* restore assemble context */
6224 pAsm
->number_used_registers
= number_used_registers_save
;
6225 pAsm
->uFirstHelpReg
= uFirstHelpReg_save
;
6226 pAsm
->uHelpReg
= uHelpReg_save
;
6227 pAsm
->starting_temp_register_number
= starting_temp_register_number_save
;
6228 pAsm
->pILInst
= pILInst_save
;
6229 pAsm
->uiCurInst
= uiCurInst_save
;
6233 pPresubDesc
= &(pAsm
->presubs
[i
]);
6235 bRet
= assemble_CAL(pAsm
,
6237 pPresubDesc
->subIL_Shift
,
6238 pCompiledSub
->NumInstructions
,
6239 pCompiledSub
->Instructions
,
6243 if(GL_FALSE
== bRet
)
6245 radeon_error("Shader presub assemble failed. \n");
6249 /* copy presub output to real dst */
6250 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
6251 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
6253 if( GL_FALSE
== assemble_dst(pAsm
) )
6258 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
6259 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
6260 pAsm
->S
[0].src
.reg
= pCompiledSub
->dstRegIndex
;
6261 pAsm
->S
[0].src
.swizzlex
= pCompiledSub
->outputSwizzleX
;
6262 pAsm
->S
[0].src
.swizzley
= pCompiledSub
->outputSwizzleY
;
6263 pAsm
->S
[0].src
.swizzlez
= pCompiledSub
->outputSwizzleZ
;
6264 pAsm
->S
[0].src
.swizzlew
= pCompiledSub
->outputSwizzleW
;
6268 pAsm
->callers
[pAsm
->unCallerArrayPointer
- 1].finale_cf_ptr
= pAsm
->cf_current_alu_clause_ptr
;
6269 pAsm
->callers
[pAsm
->unCallerArrayPointer
- 1].prelude_cf_ptr
= prelude_cf_ptr
;
6270 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
6273 if( (pPresubDesc
->number_used_registers
+ pAsm
->uFirstHelpReg
) > pAsm
->number_used_registers
)
6275 pAsm
->number_used_registers
= pPresubDesc
->number_used_registers
+ pAsm
->uFirstHelpReg
;
6277 if(pAsm
->uFirstHelpReg
> pPresubDesc
->maxStartReg
)
6279 pPresubDesc
->maxStartReg
= pAsm
->uFirstHelpReg
;
6285 GLboolean
Process_Export(r700_AssemblerBase
* pAsm
,
6287 GLuint export_starting_index
,
6288 GLuint export_count
,
6289 GLuint starting_register_number
,
6290 GLboolean is_depth_export
)
6292 unsigned char ucWriteMask
;
6294 check_current_clause(pAsm
, CF_EMPTY_CLAUSE
);
6295 check_current_clause(pAsm
, CF_EXPORT_CLAUSE
); //alloc the cf_current_export_clause_ptr
6297 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.type
= type
;
6301 case SQ_EXPORT_PIXEL
:
6302 if(GL_TRUE
== is_depth_export
)
6304 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_PIXEL_Z
;
6308 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_PIXEL_MRT0
+ export_starting_index
;
6313 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_POS_0
+ export_starting_index
;
6316 case SQ_EXPORT_PARAM
:
6317 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= 0x0 + export_starting_index
;
6321 radeon_error("Unknown export type: %d\n", type
);
6326 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.rw_gpr
= starting_register_number
;
6328 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.rw_rel
= SQ_ABSOLUTE
;
6329 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.index_gpr
= 0x0;
6330 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.elem_size
= 0x3;
6332 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.burst_count
= (export_count
- 1);
6333 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
6334 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
6335 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT
; // _DONE
6336 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
6337 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
6339 if (export_count
== 1)
6341 ucWriteMask
= pAsm
->pucOutMask
[starting_register_number
- pAsm
->starting_export_register_number
];
6342 /* exports Z as a float into Red channel */
6343 if (GL_TRUE
== is_depth_export
)
6346 if( (ucWriteMask
& 0x1) != 0)
6348 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_X
;
6352 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_MASK
;
6354 if( ((ucWriteMask
>>1) & 0x1) != 0)
6356 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_Y
;
6360 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_MASK
;
6362 if( ((ucWriteMask
>>2) & 0x1) != 0)
6364 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_Z
;
6368 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_MASK
;
6370 if( ((ucWriteMask
>>3) & 0x1) != 0)
6372 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_W
;
6376 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_MASK
;
6381 // This should only be used if all components for all registers have been written
6382 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_X
;
6383 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_Y
;
6384 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_Z
;
6385 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_W
;
6388 pAsm
->cf_last_export_ptr
= pAsm
->cf_current_export_clause_ptr
;
6393 GLboolean
Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase
*pAsm
, BITS depth_channel_select
)
6395 gl_inst_opcode Opcode_save
= pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
; //Should be OPCODE_END
6396 pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
= OPCODE_MOV
;
6398 // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
6400 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
6402 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
6403 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
6404 pAsm
->D
.dst
.reg
= pAsm
->depth_export_register_number
;
6406 pAsm
->D
.dst
.writex
= 1; // depth goes in R channel for HW
6408 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
6409 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
6410 pAsm
->S
[0].src
.reg
= pAsm
->depth_export_register_number
;
6412 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), depth_channel_select
);
6414 noneg_PVSSRC(&(pAsm
->S
[0].src
));
6416 if( GL_FALSE
== next_ins(pAsm
) )
6421 pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
= Opcode_save
;
6426 GLboolean
Process_Fragment_Exports(r700_AssemblerBase
*pR700AsmCode
,
6427 GLbitfield OutputsWritten
)
6430 GLuint export_count
= 0;
6432 if(pR700AsmCode
->depth_export_register_number
>= 0)
6434 if( GL_FALSE
== Move_Depth_Exports_To_Correct_Channels(pR700AsmCode
, SQ_SEL_Z
) ) // depth
6440 unBit
= 1 << FRAG_RESULT_COLOR
;
6441 if(OutputsWritten
& unBit
)
6443 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6447 pR700AsmCode
->uiFP_OutputMap
[FRAG_RESULT_COLOR
],
6454 unBit
= 1 << FRAG_RESULT_DEPTH
;
6455 if(OutputsWritten
& unBit
)
6457 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6461 pR700AsmCode
->uiFP_OutputMap
[FRAG_RESULT_DEPTH
],
6468 /* Need to export something, otherwise we'll hang
6469 * results are undefined anyway */
6470 if(export_count
== 0)
6472 Process_Export(pR700AsmCode
, SQ_EXPORT_PIXEL
, 0, 1, 0, GL_FALSE
);
6475 if(pR700AsmCode
->cf_last_export_ptr
!= NULL
)
6477 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
6478 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.end_of_program
= 0x1;
6484 GLboolean
Process_Vertex_Exports(r700_AssemblerBase
*pR700AsmCode
,
6485 GLbitfield OutputsWritten
)
6490 GLuint export_starting_index
= 0;
6491 GLuint export_count
= pR700AsmCode
->number_of_exports
;
6493 unBit
= 1 << VERT_RESULT_HPOS
;
6494 if(OutputsWritten
& unBit
)
6496 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6498 export_starting_index
,
6500 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_HPOS
],
6508 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
6511 pR700AsmCode
->number_of_exports
= export_count
;
6513 unBit
= 1 << VERT_RESULT_COL0
;
6514 if(OutputsWritten
& unBit
)
6516 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6518 export_starting_index
,
6520 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_COL0
],
6526 export_starting_index
++;
6529 unBit
= 1 << VERT_RESULT_COL1
;
6530 if(OutputsWritten
& unBit
)
6532 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6534 export_starting_index
,
6536 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_COL1
],
6542 export_starting_index
++;
6545 unBit
= 1 << VERT_RESULT_FOGC
;
6546 if(OutputsWritten
& unBit
)
6548 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6550 export_starting_index
,
6552 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_FOGC
],
6558 export_starting_index
++;
6563 unBit
= 1 << (VERT_RESULT_TEX0
+ i
);
6564 if(OutputsWritten
& unBit
)
6566 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6568 export_starting_index
,
6570 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_TEX0
+ i
],
6576 export_starting_index
++;
6580 for(i
=VERT_RESULT_VAR0
; i
<VERT_RESULT_MAX
; i
++)
6583 if(OutputsWritten
& unBit
)
6585 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6587 export_starting_index
,
6589 pR700AsmCode
->ucVP_OutputMap
[i
],
6595 export_starting_index
++;
6599 // At least one param should be exported
6602 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
6606 if( GL_FALSE
== Process_Export(pR700AsmCode
,
6610 pR700AsmCode
->starting_export_register_number
,
6616 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_0
;
6617 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_0
;
6618 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_0
;
6619 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_1
;
6620 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
6623 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.end_of_program
= 0x1;
6628 GLboolean
Clean_Up_Assembler(r700_AssemblerBase
*pR700AsmCode
)
6630 FREE(pR700AsmCode
->pucOutMask
);
6631 FREE(pR700AsmCode
->pInstDeps
);
6633 if(NULL
!= pR700AsmCode
->subs
)
6635 FREE(pR700AsmCode
->subs
);
6637 if(NULL
!= pR700AsmCode
->callers
)
6639 FREE(pR700AsmCode
->callers
);
6642 if(NULL
!= pR700AsmCode
->presubs
)
6644 FREE(pR700AsmCode
->presubs
);