2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
33 #include "main/mtypes.h"
34 #include "main/imports.h"
35 #include "program/prog_parameter.h"
37 #include "radeon_debug.h"
38 #include "r600_context.h"
40 #include "r700_assembler.h"
41 #include "evergreen_sq.h"
43 #define USE_CF_FOR_CONTINUE_BREAK 1
44 #define USE_CF_FOR_POP_AFTER 1
46 struct prog_instruction noise1_insts
[12] = {
47 {OPCODE_BGNSUB
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
48 {OPCODE_MOV
, {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
49 {OPCODE_MOV
, {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
50 {OPCODE_MOV
, {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
51 {OPCODE_SGT
, {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
52 {OPCODE_IF
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0},
53 {OPCODE_MOV
, {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
54 {OPCODE_RET
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
55 {OPCODE_ENDIF
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
56 {OPCODE_MOV
, {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
57 {OPCODE_RET
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
58 {OPCODE_ENDSUB
, {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}
60 float noise1_const
[2][4] = {
61 {0.300000f
, 0.900000f
, 0.500000f
, 0.300000f
}
64 COMPILED_SUB noise1_presub
= {
79 BITS
addrmode_PVSDST(PVSDST
* pPVSDST
)
81 return pPVSDST
->addrmode0
| ((BITS
)pPVSDST
->addrmode1
<< 1);
84 void setaddrmode_PVSDST(PVSDST
* pPVSDST
, BITS addrmode
)
86 pPVSDST
->addrmode0
= addrmode
& 1;
87 pPVSDST
->addrmode1
= (addrmode
>> 1) & 1;
90 void nomask_PVSDST(PVSDST
* pPVSDST
)
92 pPVSDST
->writex
= pPVSDST
->writey
= pPVSDST
->writez
= pPVSDST
->writew
= 1;
95 BITS
addrmode_PVSSRC(PVSSRC
* pPVSSRC
)
97 return pPVSSRC
->addrmode0
| ((BITS
)pPVSSRC
->addrmode1
<< 1);
100 void setaddrmode_PVSSRC(PVSSRC
* pPVSSRC
, BITS addrmode
)
102 pPVSSRC
->addrmode0
= addrmode
& 1;
103 pPVSSRC
->addrmode1
= (addrmode
>> 1) & 1;
107 void setswizzle_PVSSRC(PVSSRC
* pPVSSRC
, BITS swz
)
112 pPVSSRC
->swizzlew
= swz
;
115 void noswizzle_PVSSRC(PVSSRC
* pPVSSRC
)
117 pPVSSRC
->swizzlex
= SQ_SEL_X
;
118 pPVSSRC
->swizzley
= SQ_SEL_Y
;
119 pPVSSRC
->swizzlez
= SQ_SEL_Z
;
120 pPVSSRC
->swizzlew
= SQ_SEL_W
;
124 swizzleagain_PVSSRC(PVSSRC
* pPVSSRC
, BITS x
, BITS y
, BITS z
, BITS w
)
128 case SQ_SEL_X
: x
= pPVSSRC
->swizzlex
;
130 case SQ_SEL_Y
: x
= pPVSSRC
->swizzley
;
132 case SQ_SEL_Z
: x
= pPVSSRC
->swizzlez
;
134 case SQ_SEL_W
: x
= pPVSSRC
->swizzlew
;
141 case SQ_SEL_X
: y
= pPVSSRC
->swizzlex
;
143 case SQ_SEL_Y
: y
= pPVSSRC
->swizzley
;
145 case SQ_SEL_Z
: y
= pPVSSRC
->swizzlez
;
147 case SQ_SEL_W
: y
= pPVSSRC
->swizzlew
;
154 case SQ_SEL_X
: z
= pPVSSRC
->swizzlex
;
156 case SQ_SEL_Y
: z
= pPVSSRC
->swizzley
;
158 case SQ_SEL_Z
: z
= pPVSSRC
->swizzlez
;
160 case SQ_SEL_W
: z
= pPVSSRC
->swizzlew
;
167 case SQ_SEL_X
: w
= pPVSSRC
->swizzlex
;
169 case SQ_SEL_Y
: w
= pPVSSRC
->swizzley
;
171 case SQ_SEL_Z
: w
= pPVSSRC
->swizzlez
;
173 case SQ_SEL_W
: w
= pPVSSRC
->swizzlew
;
178 pPVSSRC
->swizzlex
= x
;
179 pPVSSRC
->swizzley
= y
;
180 pPVSSRC
->swizzlez
= z
;
181 pPVSSRC
->swizzlew
= w
;
184 void neg_PVSSRC(PVSSRC
* pPVSSRC
)
192 void noneg_PVSSRC(PVSSRC
* pPVSSRC
)
200 // negate argument (for SUB instead of ADD and alike)
201 void flipneg_PVSSRC(PVSSRC
* pPVSSRC
)
203 pPVSSRC
->negx
= !pPVSSRC
->negx
;
204 pPVSSRC
->negy
= !pPVSSRC
->negy
;
205 pPVSSRC
->negz
= !pPVSSRC
->negz
;
206 pPVSSRC
->negw
= !pPVSSRC
->negw
;
209 void zerocomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
)
213 case 0: pPVSSRC
->swizzlex
= SQ_SEL_0
; pPVSSRC
->negx
= 0; break;
214 case 1: pPVSSRC
->swizzley
= SQ_SEL_0
; pPVSSRC
->negy
= 0; break;
215 case 2: pPVSSRC
->swizzlez
= SQ_SEL_0
; pPVSSRC
->negz
= 0; break;
216 case 3: pPVSSRC
->swizzlew
= SQ_SEL_0
; pPVSSRC
->negw
= 0; break;
221 void onecomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
)
225 case 0: pPVSSRC
->swizzlex
= SQ_SEL_1
; pPVSSRC
->negx
= 0; break;
226 case 1: pPVSSRC
->swizzley
= SQ_SEL_1
; pPVSSRC
->negy
= 0; break;
227 case 2: pPVSSRC
->swizzlez
= SQ_SEL_1
; pPVSSRC
->negz
= 0; break;
228 case 3: pPVSSRC
->swizzlew
= SQ_SEL_1
; pPVSSRC
->negw
= 0; break;
233 BITS
is_misc_component_exported(VAP_OUT_VTX_FMT_0
* pOutVTXFmt0
)
235 return (pOutVTXFmt0
->point_size
|
236 pOutVTXFmt0
->edge_flag
|
237 pOutVTXFmt0
->rta_index
|
238 pOutVTXFmt0
->kill_flag
|
239 pOutVTXFmt0
->viewport_index
);
242 BITS
is_depth_component_exported(OUT_FRAGMENT_FMT_0
* pFPOutFmt
)
244 return (pFPOutFmt
->depth
|
245 pFPOutFmt
->stencil_ref
|
247 pFPOutFmt
->coverage_to_mask
);
250 GLboolean
is_reduction_opcode(PVSDWORD
* dest
)
252 if (dest
->dst
.op3
== 0)
254 if ( (dest
->dst
.opcode
== SQ_OP2_INST_DOT4
|| dest
->dst
.opcode
== SQ_OP2_INST_DOT4_IEEE
|| dest
->dst
.opcode
== SQ_OP2_INST_CUBE
) )
262 GLboolean
EG_is_reduction_opcode(PVSDWORD
* dest
)
264 if (dest
->dst
.op3
== 0)
266 if ( (dest
->dst
.opcode
== EG_OP2_INST_DOT4
|| dest
->dst
.opcode
== EG_OP2_INST_DOT4_IEEE
|| dest
->dst
.opcode
== EG_OP2_INST_CUBE
) )
274 GLuint
GetSurfaceFormat(GLenum eType
, GLuint nChannels
, GLuint
* pClient_size
)
276 GLuint format
= FMT_INVALID
;
277 GLuint uiElemSize
= 0;
282 case GL_UNSIGNED_BYTE
:
287 format
= FMT_8
; break;
289 format
= FMT_8_8
; break;
291 /* for some (small/unaligned) strides using 4 comps works
292 * better, probably same as GL_SHORT below
293 * test piglit/draw-vertices */
294 format
= FMT_8_8_8_8
; break;
296 format
= FMT_8_8_8_8
; break;
302 case GL_UNSIGNED_SHORT
:
308 format
= FMT_16
; break;
310 format
= FMT_16_16
; break;
312 /* 3 comp GL_SHORT vertex format doesnt work on r700
313 4 somehow works, test - sauerbraten */
314 format
= FMT_16_16_16_16
; break;
316 format
= FMT_16_16_16_16
; break;
322 case GL_UNSIGNED_INT
:
328 format
= FMT_32
; break;
330 format
= FMT_32_32
; break;
332 format
= FMT_32_32_32
; break;
334 format
= FMT_32_32_32_32
; break;
345 format
= FMT_32_FLOAT
; break;
347 format
= FMT_32_32_FLOAT
; break;
349 format
= FMT_32_32_32_FLOAT
; break;
351 format
= FMT_32_32_32_32_FLOAT
; break;
361 format
= FMT_32_FLOAT
; break;
363 format
= FMT_32_32_FLOAT
; break;
365 format
= FMT_32_32_32_FLOAT
; break;
367 format
= FMT_32_32_32_32_FLOAT
; break;
374 //GL_ASSERT_NO_CASE();
377 if(NULL
!= pClient_size
)
379 *pClient_size
= uiElemSize
* nChannels
;
385 unsigned int r700GetNumOperands(GLuint opcode
, GLuint nIsOp3
)
394 case SQ_OP2_INST_ADD
:
395 case SQ_OP2_INST_KILLE
:
396 case SQ_OP2_INST_KILLGT
:
397 case SQ_OP2_INST_KILLGE
:
398 case SQ_OP2_INST_KILLNE
:
399 case SQ_OP2_INST_MUL
:
400 case SQ_OP2_INST_MAX
:
401 case SQ_OP2_INST_MIN
:
402 //case SQ_OP2_INST_MAX_DX10:
403 //case SQ_OP2_INST_MIN_DX10:
404 case SQ_OP2_INST_SETE
:
405 case SQ_OP2_INST_SETNE
:
406 case SQ_OP2_INST_SETGT
:
407 case SQ_OP2_INST_SETGE
:
408 case SQ_OP2_INST_PRED_SETE
:
409 case SQ_OP2_INST_PRED_SETGT
:
410 case SQ_OP2_INST_PRED_SETGE
:
411 case SQ_OP2_INST_PRED_SETNE
:
412 case SQ_OP2_INST_DOT4
:
413 case SQ_OP2_INST_DOT4_IEEE
:
414 case SQ_OP2_INST_CUBE
:
417 case SQ_OP2_INST_MOV
:
418 case SQ_OP2_INST_MOVA_FLOOR
:
419 case SQ_OP2_INST_FRACT
:
420 case SQ_OP2_INST_FLOOR
:
421 case SQ_OP2_INST_TRUNC
:
422 case SQ_OP2_INST_EXP_IEEE
:
423 case SQ_OP2_INST_LOG_CLAMPED
:
424 case SQ_OP2_INST_LOG_IEEE
:
425 case SQ_OP2_INST_RECIP_IEEE
:
426 case SQ_OP2_INST_RECIPSQRT_IEEE
:
427 case SQ_OP2_INST_FLT_TO_INT
:
428 case SQ_OP2_INST_SIN
:
429 case SQ_OP2_INST_COS
:
432 default: radeon_error(
433 "Need instruction operand number for %x.\n", opcode
);
439 unsigned int EG_GetNumOperands(GLuint opcode
, GLuint nIsOp3
)
448 case EG_OP2_INST_ADD
:
449 case EG_OP2_INST_KILLE
:
450 case EG_OP2_INST_KILLGT
:
451 case EG_OP2_INST_KILLGE
:
452 case EG_OP2_INST_KILLNE
:
453 case EG_OP2_INST_MUL
:
454 case EG_OP2_INST_MAX
:
455 case EG_OP2_INST_MIN
:
456 //case EG_OP2_INST_MAX_DX10:
457 //case EG_OP2_INST_MIN_DX10:
458 case EG_OP2_INST_SETE
:
459 case EG_OP2_INST_SETNE
:
460 case EG_OP2_INST_SETGT
:
461 case EG_OP2_INST_SETGE
:
462 case EG_OP2_INST_PRED_SETE
:
463 case EG_OP2_INST_PRED_SETGT
:
464 case EG_OP2_INST_PRED_SETGE
:
465 case EG_OP2_INST_PRED_SETNE
:
466 case EG_OP2_INST_DOT4
:
467 case EG_OP2_INST_DOT4_IEEE
:
468 case EG_OP2_INST_CUBE
:
471 case EG_OP2_INST_MOV
:
472 //case SQ_OP2_INST_MOVA_FLOOR:
473 case EG_OP2_INST_FRACT
:
474 case EG_OP2_INST_FLOOR
:
475 case EG_OP2_INST_TRUNC
:
476 case EG_OP2_INST_EXP_IEEE
:
477 case EG_OP2_INST_LOG_CLAMPED
:
478 case EG_OP2_INST_LOG_IEEE
:
479 case EG_OP2_INST_RECIP_IEEE
:
480 case EG_OP2_INST_RECIPSQRT_IEEE
:
481 case EG_OP2_INST_FLT_TO_INT
:
482 case EG_OP2_INST_SIN
:
483 case EG_OP2_INST_COS
:
486 default: radeon_error(
487 "Need instruction operand number for %x.\n", opcode
);
493 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt
, r700_AssemblerBase
* pAsm
, R700_Shader
* pShader
)
497 Init_R700_Shader(pShader
);
498 pAsm
->pR700Shader
= pShader
;
499 pAsm
->currentShaderType
= spt
;
501 pAsm
->cf_last_export_ptr
= NULL
;
503 pAsm
->cf_current_export_clause_ptr
= NULL
;
504 pAsm
->cf_current_alu_clause_ptr
= NULL
;
505 pAsm
->cf_current_tex_clause_ptr
= NULL
;
506 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
507 pAsm
->cf_current_cf_clause_ptr
= NULL
;
509 // No clause has been created yet
510 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
512 pAsm
->number_of_colorandz_exports
= 0;
513 pAsm
->number_of_exports
= 0;
514 pAsm
->number_of_export_opcodes
= 0;
516 pAsm
->alu_x_opcode
= 0;
525 pAsm
->uLastPosUpdate
= 0;
527 *(BITS
*) &pAsm
->fp_stOutFmt0
= 0;
531 pAsm
->number_used_registers
= 0;
532 pAsm
->uUsedConsts
= 256;
536 pAsm
->uBoolConsts
= 0;
537 pAsm
->uIntConsts
= 0;
542 pAsm
->fc_stack
[0].type
= FC_NONE
;
547 pAsm
->aArgSubst
[3] = (-1);
551 for (i
=0; i
<NUMBER_OF_OUTPUT_COLORS
; i
++)
553 pAsm
->color_export_register_number
[i
] = (-1);
557 pAsm
->depth_export_register_number
= (-1);
558 pAsm
->stencil_export_register_number
= (-1);
559 pAsm
->coverage_to_mask_export_register_number
= (-1);
560 pAsm
->mask_export_register_number
= (-1);
562 pAsm
->starting_export_register_number
= 0;
563 pAsm
->starting_vfetch_register_number
= 0;
564 pAsm
->starting_temp_register_number
= 0;
565 pAsm
->uFirstHelpReg
= 0;
567 pAsm
->input_position_is_used
= GL_FALSE
;
568 pAsm
->input_normal_is_used
= GL_FALSE
;
570 for (i
=0; i
<NUMBER_OF_INPUT_COLORS
; i
++)
572 pAsm
->input_color_is_used
[ i
] = GL_FALSE
;
575 for (i
=0; i
<NUMBER_OF_TEXTURE_UNITS
; i
++)
577 pAsm
->input_texture_unit_is_used
[ i
] = GL_FALSE
;
580 for (i
=0; i
<VERT_ATTRIB_MAX
; i
++)
582 pAsm
->vfetch_instruction_ptr_array
[ i
] = NULL
;
585 pAsm
->number_of_inputs
= 0;
587 pAsm
->is_tex
= GL_FALSE
;
588 pAsm
->need_tex_barrier
= GL_FALSE
;
591 pAsm
->unSubArraySize
= 0;
592 pAsm
->unSubArrayPointer
= 0;
593 pAsm
->callers
= NULL
;
594 pAsm
->unCallerArraySize
= 0;
595 pAsm
->unCallerArrayPointer
= 0;
598 pAsm
->CALLSTACK
[0].FCSP_BeforeEntry
= 0;
599 pAsm
->CALLSTACK
[0].plstCFInstructions_local
600 = &(pAsm
->pR700Shader
->lstCFInstructions
);
602 pAsm
->CALLSTACK
[0].max
= 0;
603 pAsm
->CALLSTACK
[0].current
= 0;
605 SetActiveCFlist(pAsm
->pR700Shader
, pAsm
->CALLSTACK
[0].plstCFInstructions_local
);
609 pAsm
->presubs
= NULL
;
610 pAsm
->unPresubArraySize
= 0;
611 pAsm
->unNumPresub
= 0;
612 pAsm
->unCurNumILInsts
= 0;
614 pAsm
->unVetTexBits
= 0;
619 GLboolean
IsTex(gl_inst_opcode Opcode
)
621 if( (OPCODE_TEX
==Opcode
) || (OPCODE_TXP
==Opcode
) || (OPCODE_TXB
==Opcode
) ||
622 (OPCODE_DDX
==Opcode
) || (OPCODE_DDY
==Opcode
) || (OPCODE_TXL
==Opcode
) )
629 GLboolean
IsAlu(gl_inst_opcode Opcode
)
631 //TODO : more for fc and ex for higher spec.
639 int check_current_clause(r700_AssemblerBase
* pAsm
,
640 CF_CLAUSE_TYPE new_clause_type
)
642 if (pAsm
->cf_current_clause_type
!= new_clause_type
)
643 { //Close last open clause
644 switch (pAsm
->cf_current_clause_type
)
647 if ( pAsm
->cf_current_alu_clause_ptr
!= NULL
)
649 pAsm
->cf_current_alu_clause_ptr
= NULL
;
653 if ( pAsm
->cf_current_vtx_clause_ptr
!= NULL
)
655 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
659 if ( pAsm
->cf_current_tex_clause_ptr
!= NULL
)
661 pAsm
->cf_current_tex_clause_ptr
= NULL
;
664 case CF_EXPORT_CLAUSE
:
665 if ( pAsm
->cf_current_export_clause_ptr
!= NULL
)
667 pAsm
->cf_current_export_clause_ptr
= NULL
;
670 case CF_OTHER_CLAUSE
:
671 if ( pAsm
->cf_current_cf_clause_ptr
!= NULL
)
673 pAsm
->cf_current_cf_clause_ptr
= NULL
;
676 case CF_EMPTY_CLAUSE
:
680 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type
);
684 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
687 switch (new_clause_type
)
690 pAsm
->cf_current_clause_type
= CF_ALU_CLAUSE
;
693 pAsm
->cf_current_clause_type
= CF_VTX_CLAUSE
;
696 pAsm
->cf_current_clause_type
= CF_TEX_CLAUSE
;
698 case CF_EXPORT_CLAUSE
:
700 R700ControlFlowSXClause
* pR700ControlFlowSXClause
701 = (R700ControlFlowSXClause
*) CALLOC_STRUCT(R700ControlFlowSXClause
);
703 // Add new export instruction to control flow program
704 if (pR700ControlFlowSXClause
!= 0)
706 pAsm
->cf_current_export_clause_ptr
= pR700ControlFlowSXClause
;
707 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause
);
708 AddCFInstruction( pAsm
->pR700Shader
,
709 (R700ControlFlowInstruction
*)pR700ControlFlowSXClause
);
714 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
717 pAsm
->cf_current_clause_type
= CF_EXPORT_CLAUSE
;
720 case CF_EMPTY_CLAUSE
:
722 case CF_OTHER_CLAUSE
:
723 pAsm
->cf_current_clause_type
= CF_OTHER_CLAUSE
;
727 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type
);
735 GLboolean
add_cf_instruction(r700_AssemblerBase
* pAsm
)
737 if(GL_FALSE
== check_current_clause(pAsm
, CF_OTHER_CLAUSE
))
742 pAsm
->cf_current_cf_clause_ptr
=
743 (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
745 if (pAsm
->cf_current_cf_clause_ptr
!= NULL
)
747 Init_R700ControlFlowGenericClause(pAsm
->cf_current_cf_clause_ptr
);
748 AddCFInstruction( pAsm
->pR700Shader
,
749 (R700ControlFlowInstruction
*)pAsm
->cf_current_cf_clause_ptr
);
753 radeon_error("Could not allocate a new VFetch CF instruction.\n");
760 GLboolean
add_vfetch_instruction(r700_AssemblerBase
* pAsm
,
761 R700VertexInstruction
* vertex_instruction_ptr
)
763 if( GL_FALSE
== check_current_clause(pAsm
, CF_VTX_CLAUSE
) )
768 if( pAsm
->cf_current_vtx_clause_ptr
== NULL
||
769 ( (pAsm
->cf_current_vtx_clause_ptr
!= NULL
) &&
770 (pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
>= GetCFMaxInstructions(pAsm
->cf_current_vtx_clause_ptr
->m_ShaderInstType
)-1)
773 // Create new Vfetch control flow instruction for this new clause
774 pAsm
->cf_current_vtx_clause_ptr
= (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
776 if (pAsm
->cf_current_vtx_clause_ptr
!= NULL
)
778 Init_R700ControlFlowGenericClause(pAsm
->cf_current_vtx_clause_ptr
);
779 AddCFInstruction( pAsm
->pR700Shader
,
780 (R700ControlFlowInstruction
*)pAsm
->cf_current_vtx_clause_ptr
);
784 radeon_error("Could not allocate a new VFetch CF instruction.\n");
788 if(8 == pAsm
->unAsic
)
790 SETfield(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
, EG_CF_INST_VC
,
791 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
792 SETfield(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
, 0,
793 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
794 SETfield(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
, 0,
795 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
796 SETfield(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
, SQ_CF_COND_ACTIVE
,
797 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
798 SETfield(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
, 0,
799 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
800 SETfield(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
, 0,
801 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
802 SETfield(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
, 0,
803 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
804 SETfield(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
, 0,
805 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
806 SETfield(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
, 1,
807 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
811 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.pop_count
= 0x0;
812 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
813 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
814 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
= 0x0;
815 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
816 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
817 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_VTX
;
818 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
819 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
822 LinkVertexInstruction(pAsm
->cf_current_vtx_clause_ptr
, vertex_instruction_ptr
);
826 if(8 == pAsm
->unAsic
)
828 unsigned int count
= GETbits(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
,
829 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
) + 1;
830 SETfield(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
, count
,
831 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
835 pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.f
.count
++;
839 AddVTXInstruction(pAsm
->pR700Shader
, vertex_instruction_ptr
);
844 GLboolean
add_tex_instruction(r700_AssemblerBase
* pAsm
,
845 R700TextureInstruction
* tex_instruction_ptr
)
847 if ( GL_FALSE
== check_current_clause(pAsm
, CF_TEX_CLAUSE
) )
852 if ( pAsm
->cf_current_tex_clause_ptr
== NULL
||
853 ( (pAsm
->cf_current_tex_clause_ptr
!= NULL
) &&
854 (pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.count
>= GetCFMaxInstructions(pAsm
->cf_current_tex_clause_ptr
->m_ShaderInstType
)-1)
857 // new tex cf instruction for this new clause
858 pAsm
->cf_current_tex_clause_ptr
= (R700ControlFlowGenericClause
*) CALLOC_STRUCT(R700ControlFlowGenericClause
);
860 if (pAsm
->cf_current_tex_clause_ptr
!= NULL
)
862 Init_R700ControlFlowGenericClause(pAsm
->cf_current_tex_clause_ptr
);
863 AddCFInstruction( pAsm
->pR700Shader
,
864 (R700ControlFlowInstruction
*)pAsm
->cf_current_tex_clause_ptr
);
868 radeon_error("Could not allocate a new TEX CF instruction.\n");
872 if(8 == pAsm
->unAsic
)
874 SETfield(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
, EG_CF_INST_TC
,
875 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
876 SETfield(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
, 0,
877 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
878 SETfield(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
, 0,
879 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
880 SETfield(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
, SQ_CF_COND_ACTIVE
,
881 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
882 SETfield(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
, 0,
883 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
884 SETfield(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
, 0,
885 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
886 SETfield(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
, 0,
887 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
888 SETfield(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
, 0,
889 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
890 #ifdef FORCE_CF_TEX_BARRIER
891 SETfield(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
, 1,
892 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
894 SETfield(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
, 0,
895 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
900 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.pop_count
= 0x0;
901 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
902 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
904 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
905 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
906 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_TEX
;
907 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
908 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.barrier
= 0x0; //0x1;
913 if(8 == pAsm
->unAsic
)
915 unsigned int count
= GETbits(pAsm
->cf_current_tex_clause_ptr
->m_Word1
.val
,
916 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
) + 1;
917 SETfield(pAsm
->cf_current_vtx_clause_ptr
->m_Word1
.val
, count
,
918 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
922 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.count
++;
926 // If this clause constains any TEX instruction that is dependent on a
927 // previous instruction, set the barrier bit, also always set for vert
928 // programs as tex deps are not(yet) computed for them
929 if( pAsm
->currentShaderType
== SPT_VP
|| pAsm
->pInstDeps
[pAsm
->uiCurInst
].nDstDep
> (-1) || pAsm
->need_tex_barrier
== GL_TRUE
)
931 pAsm
->cf_current_tex_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
934 if(NULL
== pAsm
->cf_current_tex_clause_ptr
->m_pLinkedTEXInstruction
)
936 pAsm
->cf_current_tex_clause_ptr
->m_pLinkedTEXInstruction
= tex_instruction_ptr
;
937 tex_instruction_ptr
->m_pLinkedGenericClause
= pAsm
->cf_current_tex_clause_ptr
;
940 AddTEXInstruction(pAsm
->pR700Shader
, tex_instruction_ptr
);
945 GLboolean
assemble_vfetch_instruction(r700_AssemblerBase
* pAsm
,
947 GLuint destination_register
,
948 GLuint number_of_elements
,
949 GLenum dataElementType
,
950 VTX_FETCH_METHOD
* pFetchMethod
)
952 GLuint client_size_inbyte
;
954 GLuint mega_fetch_count
;
955 GLuint is_mega_fetch_flag
;
957 R700VertexGenericFetch
* vfetch_instruction_ptr
;
958 R700VertexGenericFetch
* assembled_vfetch_instruction_ptr
= pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
];
960 if (assembled_vfetch_instruction_ptr
== NULL
)
962 vfetch_instruction_ptr
= (R700VertexGenericFetch
*) CALLOC_STRUCT(R700VertexGenericFetch
);
963 if (vfetch_instruction_ptr
== NULL
)
967 Init_R700VertexGenericFetch(vfetch_instruction_ptr
);
971 vfetch_instruction_ptr
= assembled_vfetch_instruction_ptr
;
974 data_format
= GetSurfaceFormat(dataElementType
, number_of_elements
, &client_size_inbyte
);
976 if(GL_TRUE
== pFetchMethod
->bEnableMini
) //More conditions here
979 mega_fetch_count
= 0;
980 is_mega_fetch_flag
= 0;
984 mega_fetch_count
= MEGA_FETCH_BYTES
- 1;
985 is_mega_fetch_flag
= 0x1;
986 pFetchMethod
->mega_fetch_remainder
= MEGA_FETCH_BYTES
- client_size_inbyte
;
989 vfetch_instruction_ptr
->m_Word0
.f
.vtx_inst
= SQ_VTX_INST_FETCH
;
990 vfetch_instruction_ptr
->m_Word0
.f
.fetch_type
= SQ_VTX_FETCH_VERTEX_DATA
;
991 vfetch_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
993 vfetch_instruction_ptr
->m_Word0
.f
.buffer_id
= gl_client_id
;
994 vfetch_instruction_ptr
->m_Word0
.f
.src_gpr
= 0x0;
995 vfetch_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
996 vfetch_instruction_ptr
->m_Word0
.f
.src_sel_x
= SQ_SEL_X
;
997 vfetch_instruction_ptr
->m_Word0
.f
.mega_fetch_count
= mega_fetch_count
;
999 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (number_of_elements
< 1) ? SQ_SEL_0
: SQ_SEL_X
;
1000 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (number_of_elements
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
1001 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (number_of_elements
< 3) ? SQ_SEL_0
: SQ_SEL_Z
;
1002 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (number_of_elements
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
1004 vfetch_instruction_ptr
->m_Word1
.f
.use_const_fields
= 1;
1006 // Destination register
1007 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_gpr
= destination_register
;
1008 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_rel
= SQ_ABSOLUTE
;
1010 vfetch_instruction_ptr
->m_Word2
.f
.offset
= 0;
1011 vfetch_instruction_ptr
->m_Word2
.f
.const_buf_no_stride
= 0x0;
1013 vfetch_instruction_ptr
->m_Word2
.f
.mega_fetch
= is_mega_fetch_flag
;
1015 if (assembled_vfetch_instruction_ptr
== NULL
)
1017 if ( GL_FALSE
== add_vfetch_instruction(pAsm
, (R700VertexInstruction
*)vfetch_instruction_ptr
) )
1022 if (pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
] != NULL
)
1028 pAsm
->vfetch_instruction_ptr_array
[ gl_client_id
] = vfetch_instruction_ptr
;
1035 GLboolean
EG_assemble_vfetch_instruction(r700_AssemblerBase
* pAsm
,
1036 GLuint destination_register
,
1041 GLboolean normalize
,
1043 VTX_FETCH_METHOD
* pFetchMethod
)
1045 GLuint client_size_inbyte
;
1047 GLuint mega_fetch_count
;
1048 GLuint is_mega_fetch_flag
;
1050 GLuint dst_sel_x
, dst_sel_y
, dst_sel_z
, dst_sel_w
;
1052 R700VertexGenericFetch
* vfetch_instruction_ptr
;
1053 R700VertexGenericFetch
* assembled_vfetch_instruction_ptr
1054 = pAsm
->vfetch_instruction_ptr_array
[element
];
1056 if (assembled_vfetch_instruction_ptr
== NULL
)
1058 vfetch_instruction_ptr
= (R700VertexGenericFetch
*) CALLOC_STRUCT(R700VertexGenericFetch
);
1059 if (vfetch_instruction_ptr
== NULL
)
1063 Init_R700VertexGenericFetch(vfetch_instruction_ptr
);
1067 vfetch_instruction_ptr
= assembled_vfetch_instruction_ptr
;
1070 data_format
= GetSurfaceFormat(type
, size
, &client_size_inbyte
);
1072 if(GL_TRUE
== pFetchMethod
->bEnableMini
) //More conditions here
1075 mega_fetch_count
= 0;
1076 is_mega_fetch_flag
= 0;
1080 mega_fetch_count
= MEGA_FETCH_BYTES
- 1;
1081 is_mega_fetch_flag
= 0x1;
1082 pFetchMethod
->mega_fetch_remainder
= MEGA_FETCH_BYTES
- client_size_inbyte
;
1085 SETfield(vfetch_instruction_ptr
->m_Word0
.val
, EG_VC_INST_FETCH
,
1086 EG_VTX_WORD0__VC_INST_shift
,
1087 EG_VTX_WORD0__VC_INST_mask
);
1088 SETfield(vfetch_instruction_ptr
->m_Word0
.val
, EG_VTX_FETCH_VERTEX_DATA
,
1089 EG_VTX_WORD0__FETCH_TYPE_shift
,
1090 EG_VTX_WORD0__FETCH_TYPE_mask
);
1091 CLEARbit(vfetch_instruction_ptr
->m_Word0
.val
,
1092 EG_VTX_WORD0__FWQ_bit
);
1093 SETfield(vfetch_instruction_ptr
->m_Word0
.val
, element
,
1094 EG_VTX_WORD0__BUFFER_ID_shift
,
1095 EG_VTX_WORD0__BUFFER_ID_mask
);
1096 SETfield(vfetch_instruction_ptr
->m_Word0
.val
, 0x0,
1097 EG_VTX_WORD0__SRC_GPR_shift
,
1098 EG_VTX_WORD0__SRC_GPR_mask
);
1099 SETfield(vfetch_instruction_ptr
->m_Word0
.val
, SQ_ABSOLUTE
,
1100 EG_VTX_WORD0__SRC_REL_shift
,
1101 EG_VTX_WORD0__SRC_REL_bit
);
1102 SETfield(vfetch_instruction_ptr
->m_Word0
.val
, SQ_SEL_X
,
1103 EG_VTX_WORD0__SRC_SEL_X_shift
,
1104 EG_VTX_WORD0__SRC_SEL_X_mask
);
1105 SETfield(vfetch_instruction_ptr
->m_Word0
.val
, mega_fetch_count
,
1106 EG_VTX_WORD0__MFC_shift
,
1107 EG_VTX_WORD0__MFC_mask
);
1109 if(format
== GL_BGRA
)
1111 dst_sel_x
= (size
< 1) ? SQ_SEL_0
: SQ_SEL_Z
;
1112 dst_sel_y
= (size
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
1113 dst_sel_z
= (size
< 3) ? SQ_SEL_0
: SQ_SEL_X
;
1114 dst_sel_w
= (size
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
1118 dst_sel_x
= (size
< 1) ? SQ_SEL_0
: SQ_SEL_X
;
1119 dst_sel_y
= (size
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
1120 dst_sel_z
= (size
< 3) ? SQ_SEL_0
: SQ_SEL_Z
;
1121 dst_sel_w
= (size
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
1124 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, dst_sel_x
,
1125 EG_VTX_WORD1__DST_SEL_X_shift
,
1126 EG_VTX_WORD1__DST_SEL_X_mask
);
1127 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, dst_sel_y
,
1128 EG_VTX_WORD1__DST_SEL_Y_shift
,
1129 EG_VTX_WORD1__DST_SEL_Y_mask
);
1130 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, dst_sel_z
,
1131 EG_VTX_WORD1__DST_SEL_Z_shift
,
1132 EG_VTX_WORD1__DST_SEL_Z_mask
);
1133 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, dst_sel_w
,
1134 EG_VTX_WORD1__DST_SEL_W_shift
,
1135 EG_VTX_WORD1__DST_SEL_W_mask
);
1137 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, 0, /* use format here, in r6/r7, format used set in const, need to use same */
1138 EG_VTX_WORD1__UCF_shift
,
1139 EG_VTX_WORD1__UCF_bit
);
1140 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, data_format
,
1141 EG_VTX_WORD1__DATA_FORMAT_shift
,
1142 EG_VTX_WORD1__DATA_FORMAT_mask
);
1144 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, SQ_FORMAT_COMP_SIGNED
,
1145 EG_VTX_WORD1__FCA_shift
,
1146 EG_VTX_WORD1__FCA_bit
);
1150 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, SQ_FORMAT_COMP_SIGNED
,
1151 EG_VTX_WORD1__FCA_shift
,
1152 EG_VTX_WORD1__FCA_bit
);
1156 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, SQ_FORMAT_COMP_UNSIGNED
,
1157 EG_VTX_WORD1__FCA_shift
,
1158 EG_VTX_WORD1__FCA_bit
);
1160 #endif /* TEST_VFETCH */
1162 if(GL_TRUE
== normalize
)
1164 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, SQ_NUM_FORMAT_NORM
,
1165 EG_VTX_WORD1__NFA_shift
,
1166 EG_VTX_WORD1__NFA_mask
);
1170 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, SQ_NUM_FORMAT_SCALED
,
1171 EG_VTX_WORD1__NFA_shift
,
1172 EG_VTX_WORD1__NFA_mask
);
1175 /* Destination register */
1176 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, destination_register
,
1177 EG_VTX_WORD1_GPR__DST_GPR_shift
,
1178 EG_VTX_WORD1_GPR__DST_GPR_mask
);
1179 SETfield(vfetch_instruction_ptr
->m_Word1
.val
, SQ_ABSOLUTE
,
1180 EG_VTX_WORD1_GPR__DST_REL_shift
,
1181 EG_VTX_WORD1_GPR__DST_REL_bit
);
1184 SETfield(vfetch_instruction_ptr
->m_Word2
.val
, 0,
1185 EG_VTX_WORD2__OFFSET_shift
,
1186 EG_VTX_WORD2__OFFSET_mask
);
1187 SETfield(vfetch_instruction_ptr
->m_Word2
.val
, SQ_ENDIAN_NONE
,
1188 EG_VTX_WORD2__ENDIAN_SWAP_shift
,
1189 EG_VTX_WORD2__ENDIAN_SWAP_mask
);
1190 SETfield(vfetch_instruction_ptr
->m_Word2
.val
, 0,
1191 EG_VTX_WORD2__CBNS_shift
,
1192 EG_VTX_WORD2__CBNS_bit
);
1193 SETfield(vfetch_instruction_ptr
->m_Word2
.val
, is_mega_fetch_flag
,
1194 EG_VTX_WORD2__MEGA_FETCH_shift
,
1195 EG_VTX_WORD2__MEGA_FETCH_mask
);
1197 if (assembled_vfetch_instruction_ptr
== NULL
)
1199 if ( GL_FALSE
== add_vfetch_instruction(pAsm
, (R700VertexInstruction
*)vfetch_instruction_ptr
) )
1204 if (pAsm
->vfetch_instruction_ptr_array
[element
] != NULL
)
1210 pAsm
->vfetch_instruction_ptr_array
[element
] = vfetch_instruction_ptr
;
1217 GLboolean
assemble_vfetch_instruction2(r700_AssemblerBase
* pAsm
,
1218 GLuint destination_register
,
1223 GLboolean normalize
,
1225 VTX_FETCH_METHOD
* pFetchMethod
)
1227 GLuint client_size_inbyte
;
1229 GLuint mega_fetch_count
;
1230 GLuint is_mega_fetch_flag
;
1232 R700VertexGenericFetch
* vfetch_instruction_ptr
;
1233 R700VertexGenericFetch
* assembled_vfetch_instruction_ptr
1234 = pAsm
->vfetch_instruction_ptr_array
[element
];
1236 if (assembled_vfetch_instruction_ptr
== NULL
)
1238 vfetch_instruction_ptr
= (R700VertexGenericFetch
*) CALLOC_STRUCT(R700VertexGenericFetch
);
1239 if (vfetch_instruction_ptr
== NULL
)
1243 Init_R700VertexGenericFetch(vfetch_instruction_ptr
);
1247 vfetch_instruction_ptr
= assembled_vfetch_instruction_ptr
;
1250 data_format
= GetSurfaceFormat(type
, size
, &client_size_inbyte
);
1252 if(GL_TRUE
== pFetchMethod
->bEnableMini
) //More conditions here
1255 mega_fetch_count
= 0;
1256 is_mega_fetch_flag
= 0;
1260 mega_fetch_count
= MEGA_FETCH_BYTES
- 1;
1261 is_mega_fetch_flag
= 0x1;
1262 pFetchMethod
->mega_fetch_remainder
= MEGA_FETCH_BYTES
- client_size_inbyte
;
1265 vfetch_instruction_ptr
->m_Word0
.f
.vtx_inst
= SQ_VTX_INST_FETCH
;
1266 vfetch_instruction_ptr
->m_Word0
.f
.fetch_type
= SQ_VTX_FETCH_VERTEX_DATA
;
1267 vfetch_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
1269 vfetch_instruction_ptr
->m_Word0
.f
.buffer_id
= element
;
1270 vfetch_instruction_ptr
->m_Word0
.f
.src_gpr
= 0x0;
1271 vfetch_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
1272 vfetch_instruction_ptr
->m_Word0
.f
.src_sel_x
= SQ_SEL_X
;
1273 vfetch_instruction_ptr
->m_Word0
.f
.mega_fetch_count
= mega_fetch_count
;
1275 if(format
== GL_BGRA
)
1277 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (size
< 1) ? SQ_SEL_0
: SQ_SEL_Z
;
1278 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (size
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
1279 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (size
< 3) ? SQ_SEL_0
: SQ_SEL_X
;
1280 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (size
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
1284 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (size
< 1) ? SQ_SEL_0
: SQ_SEL_X
;
1285 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (size
< 2) ? SQ_SEL_0
: SQ_SEL_Y
;
1286 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (size
< 3) ? SQ_SEL_0
: SQ_SEL_Z
;
1287 vfetch_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (size
< 4) ? SQ_SEL_1
: SQ_SEL_W
;
1291 vfetch_instruction_ptr
->m_Word1
.f
.use_const_fields
= 1;
1292 vfetch_instruction_ptr
->m_Word1
.f
.data_format
= data_format
;
1293 vfetch_instruction_ptr
->m_Word2
.f
.endian_swap
= SQ_ENDIAN_NONE
;
1297 vfetch_instruction_ptr
->m_Word1
.f
.format_comp_all
= SQ_FORMAT_COMP_SIGNED
;
1301 vfetch_instruction_ptr
->m_Word1
.f
.format_comp_all
= SQ_FORMAT_COMP_UNSIGNED
;
1304 if(GL_TRUE
== normalize
)
1306 vfetch_instruction_ptr
->m_Word1
.f
.num_format_all
= SQ_NUM_FORMAT_NORM
;
1310 vfetch_instruction_ptr
->m_Word1
.f
.num_format_all
= SQ_NUM_FORMAT_INT
;
1313 // Destination register
1314 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_gpr
= destination_register
;
1315 vfetch_instruction_ptr
->m_Word1_GPR
.f
.dst_rel
= SQ_ABSOLUTE
;
1317 vfetch_instruction_ptr
->m_Word2
.f
.offset
= 0;
1318 vfetch_instruction_ptr
->m_Word2
.f
.const_buf_no_stride
= 0x0;
1320 vfetch_instruction_ptr
->m_Word2
.f
.mega_fetch
= is_mega_fetch_flag
;
1322 if (assembled_vfetch_instruction_ptr
== NULL
)
1324 if ( GL_FALSE
== add_vfetch_instruction(pAsm
, (R700VertexInstruction
*)vfetch_instruction_ptr
) )
1329 if (pAsm
->vfetch_instruction_ptr_array
[element
] != NULL
)
1335 pAsm
->vfetch_instruction_ptr_array
[element
] = vfetch_instruction_ptr
;
1342 GLboolean
cleanup_vfetch_instructions(r700_AssemblerBase
* pAsm
)
1345 pAsm
->cf_current_clause_type
= CF_EMPTY_CLAUSE
;
1346 pAsm
->cf_current_vtx_clause_ptr
= NULL
;
1348 for (i
=0; i
<VERT_ATTRIB_MAX
; i
++)
1350 pAsm
->vfetch_instruction_ptr_array
[ i
] = NULL
;
1353 cleanup_vfetch_shaderinst(pAsm
->pR700Shader
);
1358 GLuint
gethelpr(r700_AssemblerBase
* pAsm
)
1360 GLuint r
= pAsm
->uHelpReg
;
1362 if (pAsm
->uHelpReg
> pAsm
->number_used_registers
)
1364 pAsm
->number_used_registers
= pAsm
->uHelpReg
;
1368 void resethelpr(r700_AssemblerBase
* pAsm
)
1370 pAsm
->uHelpReg
= pAsm
->uFirstHelpReg
;
1373 void checkop_init(r700_AssemblerBase
* pAsm
)
1376 pAsm
->aArgSubst
[0] =
1377 pAsm
->aArgSubst
[1] =
1378 pAsm
->aArgSubst
[2] =
1379 pAsm
->aArgSubst
[3] = -1;
1382 static GLboolean
next_ins(r700_AssemblerBase
*pAsm
)
1384 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1386 if (GL_TRUE
== pAsm
->is_tex
)
1388 if (pILInst
->TexSrcTarget
== TEXTURE_RECT_INDEX
)
1390 if (GL_FALSE
== assemble_tex_instruction(pAsm
, GL_FALSE
))
1392 radeon_error("Error assembling TEX instruction\n");
1398 if (GL_FALSE
== assemble_tex_instruction(pAsm
, GL_TRUE
))
1400 radeon_error("Error assembling TEX instruction\n");
1407 if (GL_FALSE
== assemble_alu_instruction(pAsm
))
1409 radeon_error("Error assembling ALU instruction\n");
1414 if (pAsm
->D
.dst
.rtype
== DST_REG_OUT
)
1416 assert(pAsm
->D
.dst
.reg
>= pAsm
->starting_export_register_number
);
1419 //reset for next inst.
1422 pAsm
->S
[0].bits
= 0;
1423 pAsm
->S
[1].bits
= 0;
1424 pAsm
->S
[2].bits
= 0;
1425 pAsm
->is_tex
= GL_FALSE
;
1426 pAsm
->need_tex_barrier
= GL_FALSE
;
1428 pAsm
->C
[0].bits
= pAsm
->C
[1].bits
= pAsm
->C
[2].bits
= pAsm
->C
[3].bits
= 0;
1432 GLboolean
mov_temp(r700_AssemblerBase
* pAsm
, int src
)
1434 GLuint tmp
= gethelpr(pAsm
);
1436 //mov src to temp helper gpr.
1437 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
1439 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1441 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1442 pAsm
->D
.dst
.reg
= tmp
;
1444 nomask_PVSDST(&(pAsm
->D
.dst
));
1446 if( GL_FALSE
== assemble_src(pAsm
, src
, 0) )
1451 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
1452 noneg_PVSSRC(&(pAsm
->S
[0].src
));
1454 if( GL_FALSE
== next_ins(pAsm
) )
1459 pAsm
->aArgSubst
[1 + src
] = tmp
;
1464 GLboolean
checkop1(r700_AssemblerBase
* pAsm
)
1470 GLboolean
checkop2(r700_AssemblerBase
* pAsm
)
1472 GLboolean bSrcConst
[2];
1473 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1477 if( (pILInst
->SrcReg
[0].File
== PROGRAM_UNIFORM
) ||
1478 (pILInst
->SrcReg
[0].File
== PROGRAM_CONSTANT
) ||
1479 (pILInst
->SrcReg
[0].File
== PROGRAM_LOCAL_PARAM
) ||
1480 (pILInst
->SrcReg
[0].File
== PROGRAM_ENV_PARAM
) ||
1481 (pILInst
->SrcReg
[0].File
== PROGRAM_STATE_VAR
) )
1483 bSrcConst
[0] = GL_TRUE
;
1487 bSrcConst
[0] = GL_FALSE
;
1489 if( (pILInst
->SrcReg
[1].File
== PROGRAM_UNIFORM
) ||
1490 (pILInst
->SrcReg
[1].File
== PROGRAM_CONSTANT
) ||
1491 (pILInst
->SrcReg
[1].File
== PROGRAM_LOCAL_PARAM
) ||
1492 (pILInst
->SrcReg
[1].File
== PROGRAM_ENV_PARAM
) ||
1493 (pILInst
->SrcReg
[1].File
== PROGRAM_STATE_VAR
) )
1495 bSrcConst
[1] = GL_TRUE
;
1499 bSrcConst
[1] = GL_FALSE
;
1502 if( (bSrcConst
[0] == GL_TRUE
) && (bSrcConst
[1] == GL_TRUE
) )
1504 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[1].Index
)
1506 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1516 GLboolean
checkop3(r700_AssemblerBase
* pAsm
)
1518 GLboolean bSrcConst
[3];
1519 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1523 if( (pILInst
->SrcReg
[0].File
== PROGRAM_UNIFORM
) ||
1524 (pILInst
->SrcReg
[0].File
== PROGRAM_CONSTANT
) ||
1525 (pILInst
->SrcReg
[0].File
== PROGRAM_LOCAL_PARAM
) ||
1526 (pILInst
->SrcReg
[0].File
== PROGRAM_ENV_PARAM
) ||
1527 (pILInst
->SrcReg
[0].File
== PROGRAM_STATE_VAR
) )
1529 bSrcConst
[0] = GL_TRUE
;
1533 bSrcConst
[0] = GL_FALSE
;
1535 if( (pILInst
->SrcReg
[1].File
== PROGRAM_UNIFORM
) ||
1536 (pILInst
->SrcReg
[1].File
== PROGRAM_CONSTANT
) ||
1537 (pILInst
->SrcReg
[1].File
== PROGRAM_LOCAL_PARAM
) ||
1538 (pILInst
->SrcReg
[1].File
== PROGRAM_ENV_PARAM
) ||
1539 (pILInst
->SrcReg
[1].File
== PROGRAM_STATE_VAR
) )
1541 bSrcConst
[1] = GL_TRUE
;
1545 bSrcConst
[1] = GL_FALSE
;
1547 if( (pILInst
->SrcReg
[2].File
== PROGRAM_UNIFORM
) ||
1548 (pILInst
->SrcReg
[2].File
== PROGRAM_CONSTANT
) ||
1549 (pILInst
->SrcReg
[2].File
== PROGRAM_LOCAL_PARAM
) ||
1550 (pILInst
->SrcReg
[2].File
== PROGRAM_ENV_PARAM
) ||
1551 (pILInst
->SrcReg
[2].File
== PROGRAM_STATE_VAR
) )
1553 bSrcConst
[2] = GL_TRUE
;
1557 bSrcConst
[2] = GL_FALSE
;
1560 if( (GL_TRUE
== bSrcConst
[0]) &&
1561 (GL_TRUE
== bSrcConst
[1]) &&
1562 (GL_TRUE
== bSrcConst
[2]) )
1564 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1568 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1575 else if( (GL_TRUE
== bSrcConst
[0]) &&
1576 (GL_TRUE
== bSrcConst
[1]) )
1578 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[1].Index
)
1580 if( GL_FALSE
== mov_temp(pAsm
, 1) )
1588 else if ( (GL_TRUE
== bSrcConst
[0]) &&
1589 (GL_TRUE
== bSrcConst
[2]) )
1591 if(pILInst
->SrcReg
[0].Index
!= pILInst
->SrcReg
[2].Index
)
1593 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1601 else if( (GL_TRUE
== bSrcConst
[1]) &&
1602 (GL_TRUE
== bSrcConst
[2]) )
1604 if(pILInst
->SrcReg
[1].Index
!= pILInst
->SrcReg
[2].Index
)
1606 if( GL_FALSE
== mov_temp(pAsm
, 2) )
1618 GLboolean
assemble_src(r700_AssemblerBase
*pAsm
,
1622 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1629 if(pAsm
->aArgSubst
[1+src
] >= 0)
1632 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1633 pAsm
->S
[fld
].src
.rtype
= SRC_REG_TEMPORARY
;
1634 pAsm
->S
[fld
].src
.reg
= pAsm
->aArgSubst
[1+src
];
1638 if (1 == pILInst
->SrcReg
[src
].RelAddr
)
1640 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_RELATIVE_A0
);
1644 setaddrmode_PVSSRC(&(pAsm
->S
[fld
].src
), ADDR_ABSOLUTE
);
1646 switch (pILInst
->SrcReg
[src
].File
)
1648 case PROGRAM_TEMPORARY
:
1649 pAsm
->S
[fld
].src
.rtype
= SRC_REG_TEMPORARY
;
1650 pAsm
->S
[fld
].src
.reg
= pILInst
->SrcReg
[src
].Index
+ pAsm
->starting_temp_register_number
;
1652 case PROGRAM_CONSTANT
:
1653 case PROGRAM_LOCAL_PARAM
:
1654 case PROGRAM_ENV_PARAM
:
1655 case PROGRAM_STATE_VAR
:
1656 case PROGRAM_UNIFORM
:
1657 pAsm
->S
[fld
].src
.rtype
= SRC_REG_CONSTANT
;
1658 if(pILInst
->SrcReg
[src
].Index
< 0)
1660 WARN_ONCE("Negative register offsets not supported yet!\n");
1661 pAsm
->S
[fld
].src
.reg
= 0;
1665 pAsm
->S
[fld
].src
.reg
= pILInst
->SrcReg
[src
].Index
;
1669 pAsm
->S
[fld
].src
.rtype
= SRC_REG_GPR
;
1670 switch (pAsm
->currentShaderType
)
1673 pAsm
->S
[fld
].src
.reg
= pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[src
].Index
];
1676 pAsm
->S
[fld
].src
.reg
= pAsm
->ucVP_AttributeMap
[pILInst
->SrcReg
[src
].Index
];
1680 case PROGRAM_OUTPUT
:
1681 pAsm
->S
[fld
].src
.rtype
= SRC_REG_GPR
;
1682 switch (pAsm
->currentShaderType
)
1685 pAsm
->S
[fld
].src
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->SrcReg
[src
].Index
];
1688 pAsm
->S
[fld
].src
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->SrcReg
[src
].Index
];
1693 radeon_error("Invalid source argument type : %d \n", pILInst
->SrcReg
[src
].File
);
1698 pAsm
->S
[fld
].src
.swizzlex
= pILInst
->SrcReg
[src
].Swizzle
& 0x7;
1699 pAsm
->S
[fld
].src
.swizzley
= (pILInst
->SrcReg
[src
].Swizzle
>> 3) & 0x7;
1700 pAsm
->S
[fld
].src
.swizzlez
= (pILInst
->SrcReg
[src
].Swizzle
>> 6) & 0x7;
1701 pAsm
->S
[fld
].src
.swizzlew
= (pILInst
->SrcReg
[src
].Swizzle
>> 9) & 0x7;
1703 pAsm
->S
[fld
].src
.negx
= pILInst
->SrcReg
[src
].Negate
& 0x1;
1704 pAsm
->S
[fld
].src
.negy
= (pILInst
->SrcReg
[src
].Negate
>> 1) & 0x1;
1705 pAsm
->S
[fld
].src
.negz
= (pILInst
->SrcReg
[src
].Negate
>> 2) & 0x1;
1706 pAsm
->S
[fld
].src
.negw
= (pILInst
->SrcReg
[src
].Negate
>> 3) & 0x1;
1711 GLboolean
assemble_dst(r700_AssemblerBase
*pAsm
)
1713 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1714 switch (pILInst
->DstReg
.File
)
1716 case PROGRAM_TEMPORARY
:
1717 if (1 == pILInst
->DstReg
.RelAddr
)
1719 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_RELATIVE_A0
);
1723 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1725 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1726 pAsm
->D
.dst
.reg
= pILInst
->DstReg
.Index
+ pAsm
->starting_temp_register_number
;
1728 case PROGRAM_ADDRESS
:
1729 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1730 pAsm
->D
.dst
.rtype
= DST_REG_A0
;
1731 pAsm
->D
.dst
.reg
= 0;
1733 case PROGRAM_OUTPUT
:
1734 if (1 == pILInst
->DstReg
.RelAddr
)
1736 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_RELATIVE_A0
);
1740 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1742 pAsm
->D
.dst
.rtype
= DST_REG_OUT
;
1743 switch (pAsm
->currentShaderType
)
1746 pAsm
->D
.dst
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->DstReg
.Index
];
1749 pAsm
->D
.dst
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->DstReg
.Index
];
1754 radeon_error("Invalid destination output argument type\n");
1758 pAsm
->D
.dst
.writex
= pILInst
->DstReg
.WriteMask
& 0x1;
1759 pAsm
->D
.dst
.writey
= (pILInst
->DstReg
.WriteMask
>> 1) & 0x1;
1760 pAsm
->D
.dst
.writez
= (pILInst
->DstReg
.WriteMask
>> 2) & 0x1;
1761 pAsm
->D
.dst
.writew
= (pILInst
->DstReg
.WriteMask
>> 3) & 0x1;
1763 if(pILInst
->SaturateMode
== SATURATE_ZERO_ONE
)
1765 pAsm
->D2
.dst2
.SaturateMode
= 1;
1769 pAsm
->D2
.dst2
.SaturateMode
= 0;
1775 GLboolean
tex_dst(r700_AssemblerBase
*pAsm
)
1777 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1779 if(PROGRAM_TEMPORARY
== pILInst
->DstReg
.File
)
1781 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
1782 pAsm
->D
.dst
.reg
= pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.Index
+ pAsm
->starting_temp_register_number
;
1784 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1786 else if(PROGRAM_OUTPUT
== pILInst
->DstReg
.File
)
1788 pAsm
->D
.dst
.rtype
= DST_REG_OUT
;
1789 switch (pAsm
->currentShaderType
)
1792 pAsm
->D
.dst
.reg
= pAsm
->uiFP_OutputMap
[pILInst
->DstReg
.Index
];
1795 pAsm
->D
.dst
.reg
= pAsm
->ucVP_OutputMap
[pILInst
->DstReg
.Index
];
1799 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
1803 radeon_error("Invalid destination output argument type\n");
1807 pAsm
->D
.dst
.writex
= pILInst
->DstReg
.WriteMask
& 0x1;
1808 pAsm
->D
.dst
.writey
= (pILInst
->DstReg
.WriteMask
>> 1) & 0x1;
1809 pAsm
->D
.dst
.writez
= (pILInst
->DstReg
.WriteMask
>> 2) & 0x1;
1810 pAsm
->D
.dst
.writew
= (pILInst
->DstReg
.WriteMask
>> 3) & 0x1;
1815 GLboolean
tex_src(r700_AssemblerBase
*pAsm
)
1817 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
1819 GLboolean bValidTexCoord
= GL_FALSE
;
1821 if(pAsm
->aArgSubst
[1] >= 0)
1823 bValidTexCoord
= GL_TRUE
;
1824 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
1825 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
1826 pAsm
->S
[0].src
.reg
= pAsm
->aArgSubst
[1];
1830 switch (pILInst
->SrcReg
[0].File
) {
1831 case PROGRAM_UNIFORM
:
1832 case PROGRAM_CONSTANT
:
1833 case PROGRAM_LOCAL_PARAM
:
1834 case PROGRAM_ENV_PARAM
:
1835 case PROGRAM_STATE_VAR
:
1837 case PROGRAM_TEMPORARY
:
1838 bValidTexCoord
= GL_TRUE
;
1839 pAsm
->S
[0].src
.reg
= pILInst
->SrcReg
[0].Index
+
1840 pAsm
->starting_temp_register_number
;
1841 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
1844 if(SPT_VP
== pAsm
->currentShaderType
)
1846 switch (pILInst
->SrcReg
[0].Index
)
1848 case VERT_ATTRIB_TEX0
:
1849 case VERT_ATTRIB_TEX1
:
1850 case VERT_ATTRIB_TEX2
:
1851 case VERT_ATTRIB_TEX3
:
1852 case VERT_ATTRIB_TEX4
:
1853 case VERT_ATTRIB_TEX5
:
1854 case VERT_ATTRIB_TEX6
:
1855 case VERT_ATTRIB_TEX7
:
1856 bValidTexCoord
= GL_TRUE
;
1857 pAsm
->S
[0].src
.reg
=
1858 pAsm
->ucVP_AttributeMap
[pILInst
->SrcReg
[0].Index
];
1859 pAsm
->S
[0].src
.rtype
= SRC_REG_GPR
;
1865 switch (pILInst
->SrcReg
[0].Index
)
1867 case FRAG_ATTRIB_WPOS
:
1868 case FRAG_ATTRIB_COL0
:
1869 case FRAG_ATTRIB_COL1
:
1870 case FRAG_ATTRIB_FOGC
:
1871 case FRAG_ATTRIB_TEX0
:
1872 case FRAG_ATTRIB_TEX1
:
1873 case FRAG_ATTRIB_TEX2
:
1874 case FRAG_ATTRIB_TEX3
:
1875 case FRAG_ATTRIB_TEX4
:
1876 case FRAG_ATTRIB_TEX5
:
1877 case FRAG_ATTRIB_TEX6
:
1878 case FRAG_ATTRIB_TEX7
:
1879 bValidTexCoord
= GL_TRUE
;
1880 pAsm
->S
[0].src
.reg
=
1881 pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[0].Index
];
1882 pAsm
->S
[0].src
.rtype
= SRC_REG_GPR
;
1884 case FRAG_ATTRIB_FACE
:
1885 fprintf(stderr
, "FRAG_ATTRIB_FACE unsupported\n");
1887 case FRAG_ATTRIB_PNTC
:
1888 fprintf(stderr
, "FRAG_ATTRIB_PNTC unsupported\n");
1892 if( (pILInst
->SrcReg
[0].Index
>= FRAG_ATTRIB_VAR0
) ||
1893 (pILInst
->SrcReg
[0].Index
< FRAG_ATTRIB_MAX
) )
1895 bValidTexCoord
= GL_TRUE
;
1896 pAsm
->S
[0].src
.reg
=
1897 pAsm
->uiFP_AttributeMap
[pILInst
->SrcReg
[0].Index
];
1898 pAsm
->S
[0].src
.rtype
= SRC_REG_GPR
;
1906 if(GL_TRUE
== bValidTexCoord
)
1908 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
1912 radeon_error("Invalid source texcoord for TEX instruction\n");
1916 pAsm
->S
[0].src
.swizzlex
= pILInst
->SrcReg
[0].Swizzle
& 0x7;
1917 pAsm
->S
[0].src
.swizzley
= (pILInst
->SrcReg
[0].Swizzle
>> 3) & 0x7;
1918 pAsm
->S
[0].src
.swizzlez
= (pILInst
->SrcReg
[0].Swizzle
>> 6) & 0x7;
1919 pAsm
->S
[0].src
.swizzlew
= (pILInst
->SrcReg
[0].Swizzle
>> 9) & 0x7;
1921 pAsm
->S
[0].src
.negx
= pILInst
->SrcReg
[0].Negate
& 0x1;
1922 pAsm
->S
[0].src
.negy
= (pILInst
->SrcReg
[0].Negate
>> 1) & 0x1;
1923 pAsm
->S
[0].src
.negz
= (pILInst
->SrcReg
[0].Negate
>> 2) & 0x1;
1924 pAsm
->S
[0].src
.negw
= (pILInst
->SrcReg
[0].Negate
>> 3) & 0x1;
1929 GLboolean
assemble_tex_instruction(r700_AssemblerBase
*pAsm
, GLboolean normalized
)
1931 PVSSRC
* texture_coordinate_source
;
1932 PVSSRC
* texture_unit_source
;
1934 R700TextureInstruction
* tex_instruction_ptr
= (R700TextureInstruction
*) CALLOC_STRUCT(R700TextureInstruction
);
1935 if (tex_instruction_ptr
== NULL
)
1939 Init_R700TextureInstruction(tex_instruction_ptr
);
1941 texture_coordinate_source
= &(pAsm
->S
[0].src
);
1942 texture_unit_source
= &(pAsm
->S
[1].src
);
1944 if(8 == pAsm
->unAsic
) /* evergreen */
1947 SETfield(tex_instruction_ptr
->m_Word0
.val
, pAsm
->D
.dst
.opcode
,
1948 EG_TEX_WORD0__TEX_INST_shift
,
1949 EG_TEX_WORD0__TEX_INST_mask
);
1951 if( (SQ_TEX_INST_GET_GRADIENTS_H
== pAsm
->D
.dst
.opcode
)
1952 ||(SQ_TEX_INST_GET_GRADIENTS_V
== pAsm
->D
.dst
.opcode
) )
1954 /* Use fine texel derivative calculation rather than use quad derivative */
1955 SETfield(tex_instruction_ptr
->m_Word0
.val
, 1,
1956 EG_TEX_WORD0__INST_MOD_shift
,
1957 EG_TEX_WORD0__INST_MOD_mask
);
1961 SETfield(tex_instruction_ptr
->m_Word0
.val
, 0,
1962 EG_TEX_WORD0__INST_MOD_shift
,
1963 EG_TEX_WORD0__INST_MOD_mask
);
1966 CLEARbit(tex_instruction_ptr
->m_Word0
.val
, EG_TEX_WORD0__FWQ_bit
);
1968 if(SPT_VP
== pAsm
->currentShaderType
)
1970 SETfield(tex_instruction_ptr
->m_Word0
.val
, (texture_unit_source
->reg
+ VERT_ATTRIB_MAX
),
1971 EG_TEX_WORD0__RESOURCE_ID_shift
,
1972 EG_TEX_WORD0__RESOURCE_ID_mask
);
1973 pAsm
->unVetTexBits
|= 1 << texture_unit_source
->reg
;
1977 SETfield(tex_instruction_ptr
->m_Word0
.val
, texture_unit_source
->reg
,
1978 EG_TEX_WORD0__RESOURCE_ID_shift
,
1979 EG_TEX_WORD0__RESOURCE_ID_mask
);
1982 CLEARbit(tex_instruction_ptr
->m_Word0
.val
, EG_TEX_WORD0__ALT_CONST_bit
);
1983 SETfield(tex_instruction_ptr
->m_Word0
.val
, 0,
1984 EG_TEX_WORD0__RIM_shift
,
1985 EG_TEX_WORD0__RIM_mask
);
1986 SETfield(tex_instruction_ptr
->m_Word0
.val
, 0,
1987 EG_TEX_WORD0__SIM_shift
,
1988 EG_TEX_WORD0__SIM_mask
);
1992 tex_instruction_ptr
->m_Word0
.f
.tex_inst
= pAsm
->D
.dst
.opcode
;
1993 tex_instruction_ptr
->m_Word0
.f
.bc_frac_mode
= 0x0;
1994 tex_instruction_ptr
->m_Word0
.f
.fetch_whole_quad
= 0x0;
1995 tex_instruction_ptr
->m_Word0
.f
.alt_const
= 0;
1997 if(SPT_VP
== pAsm
->currentShaderType
)
1999 tex_instruction_ptr
->m_Word0
.f
.resource_id
= texture_unit_source
->reg
+ VERT_ATTRIB_MAX
;
2000 pAsm
->unVetTexBits
|= 1 << texture_unit_source
->reg
;
2004 tex_instruction_ptr
->m_Word0
.f
.resource_id
= texture_unit_source
->reg
;
2008 tex_instruction_ptr
->m_Word1
.f
.lod_bias
= 0x0;
2010 tex_instruction_ptr
->m_Word1
.f
.coord_type_x
= SQ_TEX_NORMALIZED
;
2011 tex_instruction_ptr
->m_Word1
.f
.coord_type_y
= SQ_TEX_NORMALIZED
;
2012 tex_instruction_ptr
->m_Word1
.f
.coord_type_z
= SQ_TEX_NORMALIZED
;
2013 tex_instruction_ptr
->m_Word1
.f
.coord_type_w
= SQ_TEX_NORMALIZED
;
2015 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
2016 tex_instruction_ptr
->m_Word1
.f
.coord_type_x
= SQ_TEX_UNNORMALIZED
;
2017 tex_instruction_ptr
->m_Word1
.f
.coord_type_y
= SQ_TEX_UNNORMALIZED
;
2018 tex_instruction_ptr
->m_Word1
.f
.coord_type_z
= SQ_TEX_UNNORMALIZED
;
2019 tex_instruction_ptr
->m_Word1
.f
.coord_type_w
= SQ_TEX_UNNORMALIZED
;
2022 tex_instruction_ptr
->m_Word2
.f
.offset_x
= 0x0;
2023 tex_instruction_ptr
->m_Word2
.f
.offset_y
= 0x0;
2024 tex_instruction_ptr
->m_Word2
.f
.offset_z
= 0x0;
2025 tex_instruction_ptr
->m_Word2
.f
.sampler_id
= texture_unit_source
->reg
;
2028 if ( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
2029 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
2031 if(8 == pAsm
->unAsic
) /* evergreen */
2033 SETfield(tex_instruction_ptr
->m_Word0
.val
, texture_coordinate_source
->reg
,
2034 EG_TEX_WORD0__SRC_GPR_shift
,
2035 EG_TEX_WORD0__SRC_GPR_mask
);
2036 SETfield(tex_instruction_ptr
->m_Word0
.val
, SQ_ABSOLUTE
,
2037 EG_TEX_WORD0__SRC_REL_shift
,
2038 EG_TEX_WORD0__SRC_REL_bit
);
2042 tex_instruction_ptr
->m_Word0
.f
.src_gpr
= texture_coordinate_source
->reg
;
2043 tex_instruction_ptr
->m_Word0
.f
.src_rel
= SQ_ABSOLUTE
;
2046 tex_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
2047 tex_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
;
2049 tex_instruction_ptr
->m_Word1
.f
.dst_sel_x
= (pAsm
->D
.dst
.writex
? texture_unit_source
->swizzlex
: SQ_SEL_MASK
);
2050 tex_instruction_ptr
->m_Word1
.f
.dst_sel_y
= (pAsm
->D
.dst
.writey
? texture_unit_source
->swizzley
: SQ_SEL_MASK
);
2051 tex_instruction_ptr
->m_Word1
.f
.dst_sel_z
= (pAsm
->D
.dst
.writez
? texture_unit_source
->swizzlez
: SQ_SEL_MASK
);
2052 tex_instruction_ptr
->m_Word1
.f
.dst_sel_w
= (pAsm
->D
.dst
.writew
? texture_unit_source
->swizzlew
: SQ_SEL_MASK
);
2055 tex_instruction_ptr
->m_Word2
.f
.src_sel_x
= texture_coordinate_source
->swizzlex
;
2056 tex_instruction_ptr
->m_Word2
.f
.src_sel_y
= texture_coordinate_source
->swizzley
;
2057 tex_instruction_ptr
->m_Word2
.f
.src_sel_z
= texture_coordinate_source
->swizzlez
;
2058 tex_instruction_ptr
->m_Word2
.f
.src_sel_w
= texture_coordinate_source
->swizzlew
;
2062 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
2066 if( GL_FALSE
== add_tex_instruction(pAsm
, tex_instruction_ptr
) )
2074 void initialize(r700_AssemblerBase
*pAsm
)
2076 GLuint cycle
, component
;
2078 for (cycle
=0; cycle
<NUMBER_OF_CYCLES
; cycle
++)
2080 for (component
=0; component
<NUMBER_OF_COMPONENTS
; component
++)
2082 pAsm
->hw_gpr
[cycle
][component
] = (-1);
2085 for (component
=0; component
<NUMBER_OF_COMPONENTS
; component
++)
2087 pAsm
->hw_cfile_addr
[component
] = (-1);
2088 pAsm
->hw_cfile_chan
[component
] = (-1);
2092 GLboolean
assemble_alu_src(R700ALUInstruction
* alu_instruction_ptr
,
2095 BITS scalar_channel_index
,
2096 r700_AssemblerBase
*pAsm
)
2103 //--------------------------------------------------------------------------
2104 // Source for operands src0, src1.
2105 // Values [0,127] correspond to GPR[0..127].
2106 // Values [256,511] correspond to cfile constants c[0..255].
2108 //--------------------------------------------------------------------------
2109 // Other special values are shown in the list below.
2111 // 248 SQ_ALU_SRC_0: special constant 0.0.
2112 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
2114 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
2115 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
2117 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
2118 // 253 SQ_ALU_SRC_LITERAL: literal constant.
2120 // 254 SQ_ALU_SRC_PV: previous vector result.
2121 // 255 SQ_ALU_SRC_PS: previous scalar result.
2122 //--------------------------------------------------------------------------
2124 BITS channel_swizzle
;
2125 switch (scalar_channel_index
)
2127 case 0: channel_swizzle
= pSource
->swizzlex
; break;
2128 case 1: channel_swizzle
= pSource
->swizzley
; break;
2129 case 2: channel_swizzle
= pSource
->swizzlez
; break;
2130 case 3: channel_swizzle
= pSource
->swizzlew
; break;
2131 default: channel_swizzle
= SQ_SEL_MASK
; break;
2134 if(channel_swizzle
== SQ_SEL_0
)
2136 src_sel
= SQ_ALU_SRC_0
;
2138 else if (channel_swizzle
== SQ_SEL_1
)
2140 src_sel
= SQ_ALU_SRC_1
;
2144 if ( (pSource
->rtype
== SRC_REG_TEMPORARY
) ||
2145 (pSource
->rtype
== SRC_REG_GPR
)
2148 src_sel
= pSource
->reg
;
2150 else if (pSource
->rtype
== SRC_REG_CONSTANT
)
2152 /* TODO : 4 const buffers */
2153 if(GL_TRUE
== pAsm
->bUseMemConstant
)
2155 src_sel
= pSource
->reg
+ SQ_ALU_SRC_KCACHE0_BASE
;
2156 pAsm
->kcacheUsed
= SQ_ALU_SRC_KCACHE0_BASE
;
2160 src_sel
= pSource
->reg
+ CFILE_REGISTER_OFFSET
;
2163 else if (pSource
->rtype
== SRC_REC_LITERAL
)
2165 src_sel
= SQ_ALU_SRC_LITERAL
;
2169 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
2170 source_index
, pSource
->rtype
);
2175 if( ADDR_ABSOLUTE
== addrmode_PVSSRC(pSource
) )
2177 src_rel
= SQ_ABSOLUTE
;
2181 src_rel
= SQ_RELATIVE
;
2184 switch (channel_swizzle
)
2187 src_chan
= SQ_CHAN_X
;
2190 src_chan
= SQ_CHAN_Y
;
2193 src_chan
= SQ_CHAN_Z
;
2196 src_chan
= SQ_CHAN_W
;
2200 // Does not matter since src_sel controls
2201 src_chan
= SQ_CHAN_X
;
2204 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle
);
2209 switch (scalar_channel_index
)
2211 case 0: src_neg
= pSource
->negx
; break;
2212 case 1: src_neg
= pSource
->negy
; break;
2213 case 2: src_neg
= pSource
->negz
; break;
2214 case 3: src_neg
= pSource
->negw
; break;
2215 default: src_neg
= 0; break;
2218 switch (source_index
)
2221 assert(alu_instruction_ptr
);
2222 alu_instruction_ptr
->m_Word0
.f
.src0_sel
= src_sel
;
2223 alu_instruction_ptr
->m_Word0
.f
.src0_rel
= src_rel
;
2224 alu_instruction_ptr
->m_Word0
.f
.src0_chan
= src_chan
;
2225 alu_instruction_ptr
->m_Word0
.f
.src0_neg
= src_neg
;
2228 assert(alu_instruction_ptr
);
2229 alu_instruction_ptr
->m_Word0
.f
.src1_sel
= src_sel
;
2230 alu_instruction_ptr
->m_Word0
.f
.src1_rel
= src_rel
;
2231 alu_instruction_ptr
->m_Word0
.f
.src1_chan
= src_chan
;
2232 alu_instruction_ptr
->m_Word0
.f
.src1_neg
= src_neg
;
2235 assert(alu_instruction_ptr
);
2236 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_sel
= src_sel
;
2237 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_rel
= src_rel
;
2238 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_chan
= src_chan
;
2239 alu_instruction_ptr
->m_Word1_OP3
.f
.src2_neg
= src_neg
;
2242 radeon_error("Only three sources allowed in ALU opcodes.\n");
2250 GLboolean
add_alu_instruction(r700_AssemblerBase
* pAsm
,
2251 R700ALUInstruction
* alu_instruction_ptr
,
2252 GLuint contiguous_slots_needed
)
2254 if( GL_FALSE
== check_current_clause(pAsm
, CF_ALU_CLAUSE
) )
2259 if ( pAsm
->alu_x_opcode
!= 0 ||
2260 pAsm
->cf_current_alu_clause_ptr
== NULL
||
2261 ( (pAsm
->cf_current_alu_clause_ptr
!= NULL
) &&
2262 (pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
>= (GetCFMaxInstructions(pAsm
->cf_current_alu_clause_ptr
->m_ShaderInstType
)-contiguous_slots_needed
-1) )
2266 //new cf inst for this clause
2267 pAsm
->cf_current_alu_clause_ptr
= (R700ControlFlowALUClause
*) CALLOC_STRUCT(R700ControlFlowALUClause
);
2269 // link the new cf to cf segment
2270 if(NULL
!= pAsm
->cf_current_alu_clause_ptr
)
2272 Init_R700ControlFlowALUClause(pAsm
->cf_current_alu_clause_ptr
);
2273 AddCFInstruction( pAsm
->pR700Shader
,
2274 (R700ControlFlowInstruction
*)pAsm
->cf_current_alu_clause_ptr
);
2278 radeon_error("Could not allocate a new ALU CF instruction.\n");
2282 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank0
= 0x0;
2283 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank1
= 0x0;
2284 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_mode0
= SQ_CF_KCACHE_NOP
;
2286 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_mode1
= SQ_CF_KCACHE_NOP
;
2287 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr0
= 0x0;
2288 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr1
= 0x0;
2290 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
= 0x0;
2292 if(pAsm
->alu_x_opcode
!= 0)
2294 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.cf_inst
= pAsm
->alu_x_opcode
;
2295 pAsm
->alu_x_opcode
= 0;
2299 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_ALU
;
2302 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
2304 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
2308 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
+= (GetInstructionSize(alu_instruction_ptr
->m_ShaderInstType
) / 2);
2311 /* TODO : handle 4 bufs */
2312 if( (pAsm
->kcacheUsed
> 0) && (GL_TRUE
== pAsm
->bUseMemConstant
) )
2314 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank0
= 0x0;
2315 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_bank1
= 0x0;
2316 pAsm
->cf_current_alu_clause_ptr
->m_Word0
.f
.kcache_mode0
= SQ_CF_KCACHE_LOCK_2
;
2317 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_mode1
= SQ_CF_KCACHE_NOP
;
2318 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr0
= 0x0;
2319 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.kcache_addr1
= 0x0;
2322 // If this clause constains any instruction that is forward dependent on a TEX instruction,
2323 // set the whole_quad_mode for this clause
2324 if ( pAsm
->pInstDeps
[pAsm
->uiCurInst
].nDstDep
> (-1) )
2326 pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x1;
2329 if (pAsm
->cf_current_alu_clause_ptr
->m_Word1
.f
.count
>= (GetCFMaxInstructions(pAsm
->cf_current_alu_clause_ptr
->m_ShaderInstType
)-1) )
2331 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
2334 if(NULL
== pAsm
->cf_current_alu_clause_ptr
->m_pLinkedALUInstruction
)
2336 pAsm
->cf_current_alu_clause_ptr
->m_pLinkedALUInstruction
= alu_instruction_ptr
;
2337 alu_instruction_ptr
->m_pLinkedALUClause
= pAsm
->cf_current_alu_clause_ptr
;
2340 AddALUInstruction(pAsm
->pR700Shader
, alu_instruction_ptr
);
2345 GLboolean
EG_add_ps_interp(r700_AssemblerBase
* pAsm
)
2347 R700ALUInstruction
* alu_instruction_ptr
= NULL
;
2351 unsigned int unWord0Temp
= 0x380C00;
2352 unsigned int unWord1Temp
= 0x146B10; //SQ_SEL_X
2356 for(ui
=(pAsm
->uIIns
-1); ui
>=0; ui
--)
2358 for(uj
=0; uj
<8; uj
++)
2360 alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2361 Init_R700ALUInstruction(alu_instruction_ptr
);
2362 alu_instruction_ptr
->m_Word0
.val
= unWord0Temp
;
2363 alu_instruction_ptr
->m_Word1
.val
= unWord1Temp
;
2367 SETfield(alu_instruction_ptr
->m_Word1
.val
, EG_OP2_INST_INTERP_ZW
,
2368 EG_ALU_WORD1_OP2__ALU_INST_shift
, EG_ALU_WORD1_OP2__ALU_INST_mask
);
2372 SETfield(alu_instruction_ptr
->m_Word1
.val
, EG_OP2_INST_INTERP_XY
,
2373 EG_ALU_WORD1_OP2__ALU_INST_shift
, EG_ALU_WORD1_OP2__ALU_INST_mask
);
2375 if( (uj
> 1) && (uj
< 6) )
2377 SETfield(alu_instruction_ptr
->m_Word1
.val
, 1,
2378 EG_ALU_WORD1_OP2__WRITE_MASK_shift
, EG_ALU_WORD1_OP2__WRITE_MASK_bit
);
2382 SETfield(alu_instruction_ptr
->m_Word1
.val
, 0,
2383 EG_ALU_WORD1_OP2__WRITE_MASK_shift
, EG_ALU_WORD1_OP2__WRITE_MASK_bit
);
2385 if( (uj
> 1) && (uj
< 6) )
2387 SETfield(alu_instruction_ptr
->m_Word1
.val
, ui
,
2388 EG_ALU_WORD1__DST_GPR_shift
, EG_ALU_WORD1__DST_GPR_mask
);
2392 SETfield(alu_instruction_ptr
->m_Word1
.val
, 111,
2393 EG_ALU_WORD1__DST_GPR_shift
, EG_ALU_WORD1__DST_GPR_mask
);
2396 SETfield(alu_instruction_ptr
->m_Word1
.val
, (uj
% 4),
2397 EG_ALU_WORD1__DST_CHAN_shift
, EG_ALU_WORD1__DST_CHAN_mask
);
2398 SETfield(alu_instruction_ptr
->m_Word0
.val
, (1 - (uj
% 2)),
2399 EG_ALU_WORD0__SRC0_CHAN_shift
, EG_ALU_WORD0__SRC0_CHAN_mask
);
2400 SETfield(alu_instruction_ptr
->m_Word0
.val
, (EG_ALU_SRC_PARAM_BASE
+ ui
),
2401 EG_ALU_WORD0__SRC1_SEL_shift
, EG_ALU_WORD0__SRC1_SEL_mask
);
2404 SETfield(alu_instruction_ptr
->m_Word0
.val
, 1,
2405 EG_ALU_WORD0__LAST_shift
, EG_ALU_WORD0__LAST_bit
);
2408 if(GL_FALSE
== add_alu_instruction(pAsm
, alu_instruction_ptr
, 4) )
2419 void get_src_properties(R700ALUInstruction
* alu_instruction_ptr
,
2426 switch (source_index
)
2429 *psrc_sel
= alu_instruction_ptr
->m_Word0
.f
.src0_sel
;
2430 *psrc_rel
= alu_instruction_ptr
->m_Word0
.f
.src0_rel
;
2431 *psrc_chan
= alu_instruction_ptr
->m_Word0
.f
.src0_chan
;
2432 *psrc_neg
= alu_instruction_ptr
->m_Word0
.f
.src0_neg
;
2436 *psrc_sel
= alu_instruction_ptr
->m_Word0
.f
.src1_sel
;
2437 *psrc_rel
= alu_instruction_ptr
->m_Word0
.f
.src1_rel
;
2438 *psrc_chan
= alu_instruction_ptr
->m_Word0
.f
.src1_chan
;
2439 *psrc_neg
= alu_instruction_ptr
->m_Word0
.f
.src1_neg
;
2443 *psrc_sel
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_sel
;
2444 *psrc_rel
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_rel
;
2445 *psrc_chan
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_chan
;
2446 *psrc_neg
= alu_instruction_ptr
->m_Word1_OP3
.f
.src2_neg
;
2451 int is_cfile(BITS sel
)
2453 if (sel
> 255 && sel
< 512)
2460 int is_const(BITS sel
)
2466 else if(sel
>= SQ_ALU_SRC_0
&& sel
<= SQ_ALU_SRC_LITERAL
)
2473 int is_gpr(BITS sel
)
2475 if (sel
>= 0 && sel
< 128)
2482 const GLuint BANK_SWIZZLE_VEC
[8] = {SQ_ALU_VEC_210
, //000
2483 SQ_ALU_VEC_120
, //001
2484 SQ_ALU_VEC_102
, //010
2486 SQ_ALU_VEC_201
, //011
2487 SQ_ALU_VEC_012
, //100
2488 SQ_ALU_VEC_021
, //101
2490 SQ_ALU_VEC_012
, //110
2491 SQ_ALU_VEC_012
}; //111
2493 const GLuint BANK_SWIZZLE_SCL
[8] = {SQ_ALU_SCL_210
, //000
2494 SQ_ALU_SCL_122
, //001
2495 SQ_ALU_SCL_122
, //010
2497 SQ_ALU_SCL_221
, //011
2498 SQ_ALU_SCL_212
, //100
2499 SQ_ALU_SCL_122
, //101
2501 SQ_ALU_SCL_122
, //110
2502 SQ_ALU_SCL_122
}; //111
2504 GLboolean
reserve_cfile(r700_AssemblerBase
* pAsm
,
2508 int res_match
= (-1);
2509 int res_empty
= (-1);
2513 for (res
=3; res
>=0; res
--)
2515 if(pAsm
->hw_cfile_addr
[ res
] < 0)
2519 else if( (pAsm
->hw_cfile_addr
[res
] == (int)sel
)
2521 (pAsm
->hw_cfile_chan
[ res
] == (int) chan
) )
2529 // Read for this scalar component already reserved, nothing to do here.
2532 else if(res_empty
>= 0)
2534 pAsm
->hw_cfile_addr
[ res_empty
] = sel
;
2535 pAsm
->hw_cfile_chan
[ res_empty
] = chan
;
2539 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
2545 GLboolean
reserve_gpr(r700_AssemblerBase
* pAsm
, GLuint sel
, GLuint chan
, GLuint cycle
)
2547 if(pAsm
->hw_gpr
[cycle
][chan
] < 0)
2549 pAsm
->hw_gpr
[cycle
][chan
] = sel
;
2551 else if(pAsm
->hw_gpr
[cycle
][chan
] != (int)sel
)
2553 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
2560 GLboolean
cycle_for_scalar_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
)
2564 case SQ_ALU_SCL_210
:
2566 int table
[3] = {2, 1, 0};
2567 *pCycle
= table
[sel
];
2571 case SQ_ALU_SCL_122
:
2573 int table
[3] = {1, 2, 2};
2574 *pCycle
= table
[sel
];
2578 case SQ_ALU_SCL_212
:
2580 int table
[3] = {2, 1, 2};
2581 *pCycle
= table
[sel
];
2585 case SQ_ALU_SCL_221
:
2587 int table
[3] = {2, 2, 1};
2588 *pCycle
= table
[sel
];
2593 radeon_error("Bad Scalar bank swizzle value\n");
2600 GLboolean
cycle_for_vector_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
)
2604 case SQ_ALU_VEC_012
:
2606 int table
[3] = {0, 1, 2};
2607 *pCycle
= table
[sel
];
2610 case SQ_ALU_VEC_021
:
2612 int table
[3] = {0, 2, 1};
2613 *pCycle
= table
[sel
];
2616 case SQ_ALU_VEC_120
:
2618 int table
[3] = {1, 2, 0};
2619 *pCycle
= table
[sel
];
2622 case SQ_ALU_VEC_102
:
2624 int table
[3] = {1, 0, 2};
2625 *pCycle
= table
[sel
];
2628 case SQ_ALU_VEC_201
:
2630 int table
[3] = {2, 0, 1};
2631 *pCycle
= table
[sel
];
2634 case SQ_ALU_VEC_210
:
2636 int table
[3] = {2, 1, 0};
2637 *pCycle
= table
[sel
];
2641 radeon_error("Bad Vec bank swizzle value\n");
2649 GLboolean
check_scalar(r700_AssemblerBase
* pAsm
,
2650 R700ALUInstruction
* alu_instruction_ptr
)
2653 GLuint bank_swizzle
;
2654 GLuint const_count
= 0;
2663 BITS src_sel
[3] = {0,0,0};
2664 BITS src_chan
[3] = {0,0,0};
2665 BITS src_rel
[3] = {0,0,0};
2666 BITS src_neg
[3] = {0,0,0};
2669 GLuint number_of_operands
;
2671 if(8 == pAsm
->unAsic
)
2673 number_of_operands
= EG_GetNumOperands(pAsm
->D
.dst
.opcode
, pAsm
->D
.dst
.op3
);
2677 number_of_operands
= r700GetNumOperands(pAsm
->D
.dst
.opcode
, pAsm
->D
.dst
.op3
);
2680 for (src
=0; src
<number_of_operands
; src
++)
2682 get_src_properties(alu_instruction_ptr
,
2691 swizzle_key
= ( (is_const( src_sel
[0] ) ? 4 : 0) +
2692 (is_const( src_sel
[1] ) ? 2 : 0) +
2693 (is_const( src_sel
[2] ) ? 1 : 0) );
2695 alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
= BANK_SWIZZLE_SCL
[ swizzle_key
];
2697 for (src
=0; src
<number_of_operands
; src
++)
2699 sel
= src_sel
[src
];
2700 chan
= src_chan
[src
];
2701 rel
= src_rel
[src
];
2702 neg
= src_neg
[src
];
2704 if (is_const( sel
))
2706 // Any constant, including literal and inline constants
2709 if (is_cfile( sel
))
2711 reserve_cfile(pAsm
, sel
, chan
);
2717 for (src
=0; src
<number_of_operands
; src
++)
2719 sel
= src_sel
[src
];
2720 chan
= src_chan
[src
];
2721 rel
= src_rel
[src
];
2722 neg
= src_neg
[src
];
2726 bank_swizzle
= alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
;
2728 if( GL_FALSE
== cycle_for_scalar_bank_swizzle(bank_swizzle
, src
, &cycle
) )
2733 if(cycle
< const_count
)
2735 if( GL_FALSE
== reserve_gpr(pAsm
, sel
, chan
, cycle
) )
2746 GLboolean
check_vector(r700_AssemblerBase
* pAsm
,
2747 R700ALUInstruction
* alu_instruction_ptr
)
2750 GLuint bank_swizzle
;
2751 GLuint const_count
= 0;
2760 BITS src_sel
[3] = {0,0,0};
2761 BITS src_chan
[3] = {0,0,0};
2762 BITS src_rel
[3] = {0,0,0};
2763 BITS src_neg
[3] = {0,0,0};
2766 GLuint number_of_operands
;
2768 if(8 == pAsm
->unAsic
)
2770 number_of_operands
= EG_GetNumOperands(pAsm
->D
.dst
.opcode
, pAsm
->D
.dst
.op3
);
2774 number_of_operands
= r700GetNumOperands(pAsm
->D
.dst
.opcode
, pAsm
->D
.dst
.op3
);
2777 for (src
=0; src
<number_of_operands
; src
++)
2779 get_src_properties(alu_instruction_ptr
,
2788 swizzle_key
= ( (is_const( src_sel
[0] ) ? 4 : 0) +
2789 (is_const( src_sel
[1] ) ? 2 : 0) +
2790 (is_const( src_sel
[2] ) ? 1 : 0)
2793 alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
= BANK_SWIZZLE_VEC
[swizzle_key
];
2795 for (src
=0; src
<number_of_operands
; src
++)
2797 sel
= src_sel
[src
];
2798 chan
= src_chan
[src
];
2799 rel
= src_rel
[src
];
2800 neg
= src_neg
[src
];
2803 bank_swizzle
= alu_instruction_ptr
->m_Word1
.f
.bank_swizzle
;
2807 if( GL_FALSE
== cycle_for_vector_bank_swizzle(bank_swizzle
, src
, &cycle
) )
2813 (sel
== src_sel
[0]) &&
2814 (chan
== src_chan
[0]) )
2819 if( GL_FALSE
== reserve_gpr(pAsm
, sel
, chan
, cycle
) )
2825 else if( is_const(sel
) )
2831 if( GL_FALSE
== reserve_cfile(pAsm
, sel
, chan
) )
2842 GLboolean
assemble_alu_instruction(r700_AssemblerBase
*pAsm
)
2844 R700ALUInstruction
* alu_instruction_ptr
= NULL
;
2845 R700ALUInstructionHalfLiteral
* alu_instruction_ptr_hl
;
2846 R700ALUInstructionFullLiteral
* alu_instruction_ptr_fl
;
2848 GLuint number_of_scalar_operations
;
2849 GLboolean is_single_scalar_operation
;
2850 GLuint scalar_channel_index
;
2852 PVSSRC
* pcurrent_source
;
2853 int current_source_index
;
2854 GLuint contiguous_slots_needed
;
2856 GLboolean bSplitInst
;
2858 if(8 == pAsm
->unAsic
)
2860 uNumSrc
= EG_GetNumOperands(pAsm
->D
.dst
.opcode
, pAsm
->D
.dst
.op3
);
2864 uNumSrc
= r700GetNumOperands(pAsm
->D
.dst
.opcode
, pAsm
->D
.dst
.op3
);
2867 //GLuint channel_swizzle, j;
2868 //GLuint chan_counter[4] = {0, 0, 0, 0};
2869 //PVSSRC * pSource[3];
2870 bSplitInst
= GL_FALSE
;
2871 pAsm
->kcacheUsed
= 0;
2873 if (1 == pAsm
->D
.dst
.math
)
2875 is_single_scalar_operation
= GL_TRUE
;
2876 number_of_scalar_operations
= 1;
2880 is_single_scalar_operation
= GL_FALSE
;
2881 number_of_scalar_operations
= 4;
2883 /* current assembler doesn't do more than 1 register per source */
2885 /* check read port, only very preliminary algorithm, not count in
2886 src0/1 same comp case and prev slot repeat case; also not count relative
2887 addressing. TODO: improve performance. */
2888 for(j
=0; j
<uNumSrc
; j
++)
2890 pSource
[j
] = &(pAsm
->S
[j
].src
);
2892 for(scalar_channel_index
=0; scalar_channel_index
<4; scalar_channel_index
++)
2894 for(j
=0; j
<uNumSrc
; j
++)
2896 switch (scalar_channel_index
)
2898 case 0: channel_swizzle
= pSource
[j
]->swizzlex
; break;
2899 case 1: channel_swizzle
= pSource
[j
]->swizzley
; break;
2900 case 2: channel_swizzle
= pSource
[j
]->swizzlez
; break;
2901 case 3: channel_swizzle
= pSource
[j
]->swizzlew
; break;
2902 default: channel_swizzle
= SQ_SEL_MASK
; break;
2904 if ( ((pSource
[j
]->rtype
== SRC_REG_TEMPORARY
) ||
2905 (pSource
[j
]->rtype
== SRC_REG_GPR
))
2906 && (channel_swizzle
<= SQ_SEL_W
) )
2908 chan_counter
[channel_swizzle
]++;
2912 if( (chan_counter
[SQ_SEL_X
] > 3)
2913 || (chan_counter
[SQ_SEL_Y
] > 3)
2914 || (chan_counter
[SQ_SEL_Z
] > 3)
2915 || (chan_counter
[SQ_SEL_W
] > 3) ) /* each chan bank has only 3 ports. */
2917 bSplitInst
= GL_TRUE
;
2922 contiguous_slots_needed
= 0;
2924 if(!is_single_scalar_operation
)
2926 contiguous_slots_needed
= 4;
2929 contiguous_slots_needed
+= pAsm
->D2
.dst2
.literal_slots
;
2933 for (scalar_channel_index
=0;
2934 scalar_channel_index
< number_of_scalar_operations
;
2935 scalar_channel_index
++)
2937 if(scalar_channel_index
== (number_of_scalar_operations
-1))
2939 switch(pAsm
->D2
.dst2
.literal_slots
)
2942 alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2943 Init_R700ALUInstruction(alu_instruction_ptr
);
2946 alu_instruction_ptr_hl
= (R700ALUInstructionHalfLiteral
*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral
);
2947 Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl
, pAsm
->C
[0].f
, pAsm
->C
[1].f
);
2948 alu_instruction_ptr
= (R700ALUInstruction
*)alu_instruction_ptr_hl
;
2951 alu_instruction_ptr_fl
= (R700ALUInstructionFullLiteral
*) CALLOC_STRUCT(R700ALUInstructionFullLiteral
);
2952 Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl
,pAsm
->C
[0].f
, pAsm
->C
[1].f
, pAsm
->C
[2].f
, pAsm
->C
[3].f
);
2953 alu_instruction_ptr
= (R700ALUInstruction
*)alu_instruction_ptr_fl
;
2959 alu_instruction_ptr
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
2960 Init_R700ALUInstruction(alu_instruction_ptr
);
2964 current_source_index
= 0;
2965 pcurrent_source
= &(pAsm
->S
[0].src
);
2967 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2968 current_source_index
,
2970 scalar_channel_index
,
2979 current_source_index
= 1;
2980 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
2982 if (GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
2983 current_source_index
,
2985 scalar_channel_index
,
2993 alu_instruction_ptr
->m_Word0
.f
.index_mode
= pAsm
->D2
.dst2
.index_mode
;
2995 if( (is_single_scalar_operation
== GL_TRUE
)
2996 || (GL_TRUE
== bSplitInst
) )
2998 alu_instruction_ptr
->m_Word0
.f
.last
= 1;
3002 alu_instruction_ptr
->m_Word0
.f
.last
= (scalar_channel_index
== 3) ? 1 : 0;
3005 alu_instruction_ptr
->m_Word0
.f
.pred_sel
= (pAsm
->D
.dst
.pred_inv
> 0) ? 1 : 0;
3006 if(1 == pAsm
->D
.dst
.predicated
)
3008 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0x1;
3009 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0x1;
3013 alu_instruction_ptr
->m_Word1_OP2
.f
.update_pred
= 0x0;
3014 alu_instruction_ptr
->m_Word1_OP2
.f
.update_execute_mask
= 0x0;
3018 if( (pAsm
->D
.dst
.rtype
== DST_REG_TEMPORARY
) ||
3019 (pAsm
->D
.dst
.rtype
== DST_REG_OUT
) )
3021 alu_instruction_ptr
->m_Word1
.f
.dst_gpr
= pAsm
->D
.dst
.reg
;
3025 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
3029 if ( ADDR_RELATIVE_A0
== addrmode_PVSDST(&(pAsm
->D
.dst
)) )
3031 alu_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_RELATIVE
;
3035 alu_instruction_ptr
->m_Word1
.f
.dst_rel
= SQ_ABSOLUTE
;
3038 if ( is_single_scalar_operation
== GL_TRUE
)
3040 // Override scalar_channel_index since only one scalar value will be written
3041 if(pAsm
->D
.dst
.writex
)
3043 scalar_channel_index
= 0;
3045 else if(pAsm
->D
.dst
.writey
)
3047 scalar_channel_index
= 1;
3049 else if(pAsm
->D
.dst
.writez
)
3051 scalar_channel_index
= 2;
3053 else if(pAsm
->D
.dst
.writew
)
3055 scalar_channel_index
= 3;
3059 alu_instruction_ptr
->m_Word1
.f
.dst_chan
= scalar_channel_index
;
3061 alu_instruction_ptr
->m_Word1
.f
.clamp
= pAsm
->D2
.dst2
.SaturateMode
;
3063 if (pAsm
->D
.dst
.op3
)
3067 alu_instruction_ptr
->m_Word1_OP3
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
3069 //There's 3rd src for op3
3070 current_source_index
= 2;
3071 pcurrent_source
= &(pAsm
->S
[current_source_index
].src
);
3073 if ( GL_FALSE
== assemble_alu_src(alu_instruction_ptr
,
3074 current_source_index
,
3076 scalar_channel_index
,
3087 alu_instruction_ptr
->m_Word1_OP2
.f6
.alu_inst
= pAsm
->D
.dst
.opcode
;
3089 alu_instruction_ptr
->m_Word1_OP2
.f6
.src0_abs
= pAsm
->S
[0].src
.abs
;
3090 alu_instruction_ptr
->m_Word1_OP2
.f6
.src1_abs
= pAsm
->S
[1].src
.abs
;
3092 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
3093 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
3094 switch (scalar_channel_index
)
3097 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writex
;
3100 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writey
;
3103 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writez
;
3106 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= pAsm
->D
.dst
.writew
;
3109 alu_instruction_ptr
->m_Word1_OP2
.f6
.write_mask
= 1; //SQ_SEL_MASK;
3112 alu_instruction_ptr
->m_Word1_OP2
.f6
.omod
= SQ_ALU_OMOD_OFF
;
3116 alu_instruction_ptr
->m_Word1_OP2
.f
.alu_inst
= pAsm
->D
.dst
.opcode
;
3118 alu_instruction_ptr
->m_Word1_OP2
.f
.src0_abs
= pAsm
->S
[0].src
.abs
;
3119 alu_instruction_ptr
->m_Word1_OP2
.f
.src1_abs
= pAsm
->S
[1].src
.abs
;
3121 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
3122 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
3123 switch (scalar_channel_index
)
3126 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writex
;
3129 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writey
;
3132 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writez
;
3135 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= pAsm
->D
.dst
.writew
;
3138 alu_instruction_ptr
->m_Word1_OP2
.f
.write_mask
= 1; //SQ_SEL_MASK;
3141 alu_instruction_ptr
->m_Word1_OP2
.f
.omod
= SQ_ALU_OMOD_OFF
;
3145 if(GL_FALSE
== add_alu_instruction(pAsm
, alu_instruction_ptr
, contiguous_slots_needed
) )
3151 * Judge the type of current instruction, is it vector or scalar
3154 if (is_single_scalar_operation
)
3156 if(GL_FALSE
== check_scalar(pAsm
, alu_instruction_ptr
) )
3163 if(GL_FALSE
== check_vector(pAsm
, alu_instruction_ptr
) )
3169 contiguous_slots_needed
-= 1;
3175 GLboolean
assemble_math_function(r700_AssemblerBase
* pAsm
, BITS opcode
)
3181 tmp
= gethelpr(pAsm
);
3183 // opcode tmp.x, a.x
3186 pAsm
->D
.dst
.opcode
= opcode
;
3187 pAsm
->D
.dst
.math
= 1;
3189 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3190 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3191 pAsm
->D
.dst
.reg
= tmp
;
3192 pAsm
->D
.dst
.writex
= 1;
3194 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3199 if( pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_RSQ
)
3200 pAsm
->S
[0].src
.abs
= 1;
3202 if ( GL_FALSE
== next_ins(pAsm
) )
3207 // Now replicate result to all necessary channels in destination
3208 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3210 if( GL_FALSE
== assemble_dst(pAsm
) )
3215 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3216 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3217 pAsm
->S
[0].src
.reg
= tmp
;
3219 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3220 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3222 if( GL_FALSE
== next_ins(pAsm
) )
3230 GLboolean
assemble_ABS(r700_AssemblerBase
*pAsm
)
3234 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3236 if( GL_FALSE
== assemble_dst(pAsm
) )
3240 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3245 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
3246 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
3248 if ( GL_FALSE
== next_ins(pAsm
) )
3256 GLboolean
assemble_ADD(r700_AssemblerBase
*pAsm
)
3258 if( GL_FALSE
== checkop2(pAsm
) )
3263 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
3265 if( GL_FALSE
== assemble_dst(pAsm
) )
3270 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3275 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3280 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_SUB
)
3282 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
3285 if( GL_FALSE
== next_ins(pAsm
) )
3293 GLboolean
assemble_ARL(r700_AssemblerBase
*pAsm
)
3294 { /* TODO: ar values dont' persist between clauses */
3295 if( GL_FALSE
== checkop1(pAsm
) )
3300 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOVA_FLOOR
;
3301 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3302 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3303 pAsm
->D
.dst
.reg
= 0;
3304 pAsm
->D
.dst
.writex
= 0;
3305 pAsm
->D
.dst
.writey
= 0;
3306 pAsm
->D
.dst
.writez
= 0;
3307 pAsm
->D
.dst
.writew
= 0;
3309 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3314 if( GL_FALSE
== next_ins(pAsm
) )
3322 GLboolean
assemble_BAD(char *opcode_str
)
3324 radeon_error("Not yet implemented instruction (%s)\n", opcode_str
);
3328 GLboolean
assemble_CMP(r700_AssemblerBase
*pAsm
)
3332 if( GL_FALSE
== checkop3(pAsm
) )
3337 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_CNDGE
;
3338 pAsm
->D
.dst
.op3
= 1;
3342 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
3344 //OP3 has no support for write mask
3345 tmp
= gethelpr(pAsm
);
3347 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3348 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3349 pAsm
->D
.dst
.reg
= tmp
;
3351 nomask_PVSDST(&(pAsm
->D
.dst
));
3355 if( GL_FALSE
== assemble_dst(pAsm
) )
3361 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3366 if( GL_FALSE
== assemble_src(pAsm
, 2, 1) )
3371 if( GL_FALSE
== assemble_src(pAsm
, 1, 2) )
3376 if ( GL_FALSE
== next_ins(pAsm
) )
3381 if (0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
3383 if( GL_FALSE
== assemble_dst(pAsm
) )
3388 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3391 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3392 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3393 pAsm
->S
[0].src
.reg
= tmp
;
3395 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3396 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3398 if( GL_FALSE
== next_ins(pAsm
) )
3407 GLboolean
assemble_TRIG(r700_AssemblerBase
*pAsm
, BITS opcode
)
3410 * r600 - trunc to -PI..PI range
3411 * r700 - normalize by dividing by 2PI
3418 tmp
= gethelpr(pAsm
);
3420 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
3421 pAsm
->D
.dst
.op3
= 1;
3423 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3424 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3425 pAsm
->D
.dst
.reg
= tmp
;
3427 assemble_src(pAsm
, 0, -1);
3429 pAsm
->S
[1].src
.rtype
= SRC_REC_LITERAL
;
3430 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
3432 pAsm
->S
[2].src
.rtype
= SRC_REC_LITERAL
;
3433 setswizzle_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_Y
);
3435 pAsm
->D2
.dst2
.literal_slots
= 1;
3436 pAsm
->C
[0].f
= 1/(3.1415926535 * 2);
3437 pAsm
->C
[1].f
= 0.5f
;
3439 if ( GL_FALSE
== next_ins(pAsm
) )
3444 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
3446 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3447 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3448 pAsm
->D
.dst
.reg
= tmp
;
3449 pAsm
->D
.dst
.writex
= 1;
3451 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3452 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3453 pAsm
->S
[0].src
.reg
= tmp
;
3454 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3456 if(( GL_FALSE
== next_ins(pAsm
) ))
3460 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
3461 pAsm
->D
.dst
.op3
= 1;
3463 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3464 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3465 pAsm
->D
.dst
.reg
= tmp
;
3467 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3468 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3469 pAsm
->S
[0].src
.reg
= tmp
;
3470 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3472 pAsm
->S
[1].src
.rtype
= SRC_REC_LITERAL
;
3473 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
3475 pAsm
->S
[2].src
.rtype
= SRC_REC_LITERAL
;
3476 setswizzle_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_Y
);
3478 pAsm
->D2
.dst2
.literal_slots
= 1;
3482 pAsm
->C
[0].f
= 3.1415926535897f
* 2.0f
;
3483 pAsm
->C
[1].f
= -3.1415926535897f
;
3487 pAsm
->C
[0].f
= 1.0f
;
3488 pAsm
->C
[1].f
= -0.5f
;
3491 if(( GL_FALSE
== next_ins(pAsm
) ))
3496 pAsm
->D
.dst
.opcode
= opcode
;
3497 pAsm
->D
.dst
.math
= 1;
3501 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3502 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3503 pAsm
->S
[0].src
.reg
= tmp
;
3504 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3505 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3509 //TODO - replicate if more channels set in WriteMask
3514 GLboolean
assemble_DOT(r700_AssemblerBase
*pAsm
)
3516 if( GL_FALSE
== checkop2(pAsm
) )
3521 if(8 == pAsm
->unAsic
)
3523 pAsm
->D
.dst
.opcode
= EG_OP2_INST_DOT4
;
3527 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_DOT4
;
3530 if( GL_FALSE
== assemble_dst(pAsm
) )
3535 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3540 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3545 if(OPCODE_DP2
== pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
)
3547 zerocomp_PVSSRC(&(pAsm
->S
[0].src
),2);
3548 zerocomp_PVSSRC(&(pAsm
->S
[0].src
),3);
3549 zerocomp_PVSSRC(&(pAsm
->S
[1].src
),2);
3550 zerocomp_PVSSRC(&(pAsm
->S
[1].src
),3);
3552 else if(OPCODE_DP3
== pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
)
3554 zerocomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
3555 zerocomp_PVSSRC(&(pAsm
->S
[1].src
), 3);
3557 else if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_DPH
)
3559 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
3562 if ( GL_FALSE
== next_ins(pAsm
) )
3570 GLboolean
assemble_DST(r700_AssemblerBase
*pAsm
)
3572 if( GL_FALSE
== checkop2(pAsm
) )
3577 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
3579 if( GL_FALSE
== assemble_dst(pAsm
) )
3584 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3589 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
3594 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 0);
3595 onecomp_PVSSRC(&(pAsm
->S
[0].src
), 3);
3597 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 0);
3598 onecomp_PVSSRC(&(pAsm
->S
[1].src
), 2);
3600 if ( GL_FALSE
== next_ins(pAsm
) )
3608 GLboolean
assemble_EX2(r700_AssemblerBase
*pAsm
)
3610 if(8 == pAsm
->unAsic
)
3612 return assemble_math_function(pAsm
, EG_OP2_INST_EXP_IEEE
);
3615 return assemble_math_function(pAsm
, SQ_OP2_INST_EXP_IEEE
);
3618 GLboolean
assemble_EXP(r700_AssemblerBase
*pAsm
)
3624 tmp
= gethelpr(pAsm
);
3629 if (pAsm
->pILInst
->DstReg
.WriteMask
& 0x1) {
3630 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
3632 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3633 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3634 pAsm
->D
.dst
.reg
= tmp
;
3635 pAsm
->D
.dst
.writex
= 1;
3637 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3642 if( GL_FALSE
== next_ins(pAsm
) )
3647 if(8 == pAsm
->unAsic
)
3649 pAsm
->D
.dst
.opcode
= EG_OP2_INST_EXP_IEEE
;
3653 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3655 pAsm
->D
.dst
.math
= 1;
3657 if( GL_FALSE
== assemble_dst(pAsm
) )
3662 pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3664 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3665 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
3666 pAsm
->S
[0].src
.reg
= tmp
;
3668 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
3669 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3671 if( GL_FALSE
== next_ins(pAsm
) )
3679 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 1) & 0x1) {
3680 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
3682 if( GL_FALSE
== assemble_dst(pAsm
) )
3687 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3692 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
3694 if( GL_FALSE
== next_ins(pAsm
) )
3702 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 2) & 0x1) {
3703 if(8 == pAsm
->unAsic
)
3705 pAsm
->D
.dst
.opcode
= EG_OP2_INST_EXP_IEEE
;
3709 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
3711 pAsm
->D
.dst
.math
= 1;
3713 if( GL_FALSE
== assemble_dst(pAsm
) )
3718 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3723 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writew
= 0;
3725 if( GL_FALSE
== next_ins(pAsm
) )
3733 if ((pAsm
->pILInst
->DstReg
.WriteMask
>> 3) & 0x1) {
3734 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3736 if( GL_FALSE
== assemble_dst(pAsm
) )
3741 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= 0;
3743 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3744 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3745 pAsm
->S
[0].src
.reg
= tmp
;
3747 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_1
);
3748 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3750 if( GL_FALSE
== next_ins(pAsm
) )
3759 GLboolean
assemble_FLR(r700_AssemblerBase
*pAsm
)
3763 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
3765 if ( GL_FALSE
== assemble_dst(pAsm
) )
3770 if ( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3775 if ( GL_FALSE
== next_ins(pAsm
) )
3783 GLboolean
assemble_FLR_INT(r700_AssemblerBase
*pAsm
)
3785 if(8 == pAsm
->unAsic
)
3787 return assemble_math_function(pAsm
, EG_OP2_INST_FLT_TO_INT
);
3790 return assemble_math_function(pAsm
, SQ_OP2_INST_FLT_TO_INT
);
3793 GLboolean
assemble_FRC(r700_AssemblerBase
*pAsm
)
3797 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
3799 if ( GL_FALSE
== assemble_dst(pAsm
) )
3804 if ( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
3809 if ( GL_FALSE
== next_ins(pAsm
) )
3817 GLboolean
assemble_KIL(r700_AssemblerBase
*pAsm
, GLuint opcode
)
3819 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
3821 if(pILInst
->Opcode
== OPCODE_KIL
)
3824 pAsm
->D
.dst
.opcode
= opcode
;
3825 //pAsm->D.dst.math = 1;
3827 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3828 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3829 pAsm
->D
.dst
.reg
= 0;
3830 pAsm
->D
.dst
.writex
= 0;
3831 pAsm
->D
.dst
.writey
= 0;
3832 pAsm
->D
.dst
.writez
= 0;
3833 pAsm
->D
.dst
.writew
= 0;
3835 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3836 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3837 pAsm
->S
[0].src
.reg
= 0;
3838 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_0
);
3839 noneg_PVSSRC(&(pAsm
->S
[0].src
));
3841 if(pILInst
->Opcode
== OPCODE_KIL_NV
)
3843 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
3844 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
3845 pAsm
->S
[1].src
.reg
= 0;
3846 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_1
);
3847 neg_PVSSRC(&(pAsm
->S
[1].src
));
3851 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
3858 if ( GL_FALSE
== next_ins(pAsm
) )
3863 /* Doc says KILL has to be last(end) ALU clause */
3864 pAsm
->pR700Shader
->killIsUsed
= GL_TRUE
;
3865 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
3870 GLboolean
assemble_LG2(r700_AssemblerBase
*pAsm
)
3872 if(8 == pAsm
->unAsic
)
3874 return assemble_math_function(pAsm
, EG_OP2_INST_LOG_IEEE
);
3877 return assemble_math_function(pAsm
, SQ_OP2_INST_LOG_IEEE
);
3880 GLboolean
assemble_LRP(r700_AssemblerBase
*pAsm
)
3884 if( GL_FALSE
== checkop3(pAsm
) )
3889 tmp
= gethelpr(pAsm
);
3891 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
3893 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3894 pAsm
->D
.dst
.reg
= tmp
;
3895 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3896 nomask_PVSDST(&(pAsm
->D
.dst
));
3899 if( GL_FALSE
== assemble_src(pAsm
, 1, 0) )
3904 if ( GL_FALSE
== assemble_src(pAsm
, 2, 1) )
3909 neg_PVSSRC(&(pAsm
->S
[1].src
));
3911 if( GL_FALSE
== next_ins(pAsm
) )
3916 if(8 == pAsm
->unAsic
)
3918 pAsm
->D
.dst
.opcode
= EG_OP3_INST_MULADD
;
3922 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
3924 pAsm
->D
.dst
.op3
= 1;
3926 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3927 pAsm
->D
.dst
.reg
= tmp
;
3928 nomask_PVSDST(&(pAsm
->D
.dst
));
3929 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3931 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3932 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3933 pAsm
->S
[0].src
.reg
= tmp
;
3934 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3937 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
3942 if( GL_FALSE
== assemble_src(pAsm
, 2, -1) )
3947 if( GL_FALSE
== next_ins(pAsm
) )
3952 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
3954 if( GL_FALSE
== assemble_dst(pAsm
) )
3959 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
3960 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
3961 pAsm
->S
[0].src
.reg
= tmp
;
3962 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
3964 if( GL_FALSE
== next_ins(pAsm
) )
3972 GLboolean
assemble_LOG(r700_AssemblerBase
*pAsm
)
3974 BITS tmp1
, tmp2
, tmp3
;
3978 tmp1
= gethelpr(pAsm
);
3979 tmp2
= gethelpr(pAsm
);
3980 tmp3
= gethelpr(pAsm
);
3982 // FIXME: The hardware can do fabs() directly on input
3983 // elements, but the compiler doesn't have the
3984 // capability to use that.
3986 // MAX tmp1.x, a.x, -a.x (fabs(a.x))
3988 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
3990 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
3991 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
3992 pAsm
->D
.dst
.reg
= tmp1
;
3993 pAsm
->D
.dst
.writex
= 1;
3995 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4000 pAsm
->S
[1].bits
= pAsm
->S
[0].bits
;
4001 flipneg_PVSSRC(&(pAsm
->S
[1].src
));
4003 if ( GL_FALSE
== next_ins(pAsm
) )
4010 // LG2 tmp2.x, tmp1.x
4011 // FLOOR tmp3.x, tmp2.x
4012 // MOV dst.x, tmp3.x
4013 // ADD tmp3.x, tmp2.x, -tmp3.x
4014 // EX2 dst.y, tmp3.x
4015 // MOV dst.z, tmp2.x
4018 // LG2 tmp2.x, tmp1.x
4019 // FLOOR tmp3.x, tmp2.x
4021 if(8 == pAsm
->unAsic
)
4023 pAsm
->D
.dst
.opcode
= EG_OP2_INST_LOG_IEEE
;
4027 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_IEEE
;
4029 pAsm
->D
.dst
.math
= 1;
4031 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4032 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4033 pAsm
->D
.dst
.reg
= tmp2
;
4034 pAsm
->D
.dst
.writex
= 1;
4036 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4037 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
4038 pAsm
->S
[0].src
.reg
= tmp1
;
4040 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4041 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4043 if( GL_FALSE
== next_ins(pAsm
) )
4048 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FLOOR
;
4050 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4051 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4052 pAsm
->D
.dst
.reg
= tmp3
;
4053 pAsm
->D
.dst
.writex
= 1;
4055 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4056 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
4057 pAsm
->S
[0].src
.reg
= tmp2
;
4059 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4060 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4062 if( GL_FALSE
== next_ins(pAsm
) )
4067 // MOV dst.x, tmp3.x
4069 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4071 if( GL_FALSE
== assemble_dst(pAsm
) )
4076 pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
4078 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4079 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
4080 pAsm
->S
[0].src
.reg
= tmp3
;
4082 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4083 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4085 if( GL_FALSE
== next_ins(pAsm
) )
4090 // ADD tmp3.x, tmp2.x, -tmp3.x
4091 // EX2 dst.y, tmp3.x
4093 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
4095 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4096 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4097 pAsm
->D
.dst
.reg
= tmp3
;
4098 pAsm
->D
.dst
.writex
= 1;
4100 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4101 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
4102 pAsm
->S
[0].src
.reg
= tmp2
;
4104 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4105 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4107 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4108 pAsm
->S
[1].src
.rtype
= DST_REG_TEMPORARY
;
4109 pAsm
->S
[1].src
.reg
= tmp3
;
4111 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
4112 neg_PVSSRC(&(pAsm
->S
[1].src
));
4114 if( GL_FALSE
== next_ins(pAsm
) )
4119 if(8 == pAsm
->unAsic
)
4121 pAsm
->D
.dst
.opcode
= EG_OP2_INST_EXP_IEEE
;
4125 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
4127 pAsm
->D
.dst
.math
= 1;
4129 if( GL_FALSE
== assemble_dst(pAsm
) )
4134 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
4136 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4137 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
4138 pAsm
->S
[0].src
.reg
= tmp3
;
4140 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4141 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4143 if( GL_FALSE
== next_ins(pAsm
) )
4148 // MOV dst.z, tmp2.x
4150 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4152 if( GL_FALSE
== assemble_dst(pAsm
) )
4157 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writew
= 0;
4159 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4160 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
4161 pAsm
->S
[0].src
.reg
= tmp2
;
4163 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4164 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4166 if( GL_FALSE
== next_ins(pAsm
) )
4173 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4175 if( GL_FALSE
== assemble_dst(pAsm
) )
4180 pAsm
->D
.dst
.writex
= pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= 0;
4182 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4183 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4184 pAsm
->S
[0].src
.reg
= tmp1
;
4186 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_1
);
4187 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4189 if( GL_FALSE
== next_ins(pAsm
) )
4197 GLboolean
assemble_MAD(struct r700_AssemblerBase
*pAsm
)
4200 GLboolean bReplaceDst
= GL_FALSE
;
4201 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
4203 if( GL_FALSE
== checkop3(pAsm
) )
4208 if(8 == pAsm
->unAsic
)
4210 pAsm
->D
.dst
.opcode
= EG_OP3_INST_MULADD
;
4214 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
4216 pAsm
->D
.dst
.op3
= 1;
4220 if(PROGRAM_TEMPORARY
== pILInst
->DstReg
.File
)
4221 { /* TODO : more investigation on MAD src and dst using same register */
4222 for(ii
=0; ii
<3; ii
++)
4224 if( (PROGRAM_TEMPORARY
== pILInst
->SrcReg
[ii
].File
)
4225 && (pILInst
->DstReg
.Index
== pILInst
->SrcReg
[ii
].Index
) )
4227 bReplaceDst
= GL_TRUE
;
4232 if(0xF != pILInst
->DstReg
.WriteMask
)
4233 { /* OP3 has no support for write mask */
4234 bReplaceDst
= GL_TRUE
;
4237 if(GL_TRUE
== bReplaceDst
)
4239 tmp
= gethelpr(pAsm
);
4241 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4242 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4243 pAsm
->D
.dst
.reg
= tmp
;
4245 nomask_PVSDST(&(pAsm
->D
.dst
));
4249 if( GL_FALSE
== assemble_dst(pAsm
) )
4255 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4260 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4265 if( GL_FALSE
== assemble_src(pAsm
, 2, -1) )
4270 if ( GL_FALSE
== next_ins(pAsm
) )
4275 if (GL_TRUE
== bReplaceDst
)
4277 if( GL_FALSE
== assemble_dst(pAsm
) )
4282 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4285 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4286 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4287 pAsm
->S
[0].src
.reg
= tmp
;
4289 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4290 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
4292 if( GL_FALSE
== next_ins(pAsm
) )
4302 GLboolean
assemble_LIT(r700_AssemblerBase
*pAsm
)
4304 unsigned int dstReg
;
4305 unsigned int dstType
;
4307 int tmp
= gethelpr(pAsm
);
4309 if( GL_FALSE
== assemble_dst(pAsm
) )
4313 dstReg
= pAsm
->D
.dst
.reg
;
4314 dstType
= pAsm
->D
.dst
.rtype
;
4316 /* dst.xw, <- 1.0 */
4317 if( pAsm
->D
.dst
.writex
|| pAsm
->D
.dst
.writew
)
4319 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4324 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4325 pAsm
->D
.dst
.writey
= 0;
4326 pAsm
->D
.dst
.writez
= 0;
4327 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4328 pAsm
->S
[0].src
.reg
= tmp
;
4329 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4330 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4331 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_1
;
4332 pAsm
->S
[0].src
.swizzley
= SQ_SEL_1
;
4333 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_1
;
4334 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_1
;
4335 if( GL_FALSE
== next_ins(pAsm
) )
4341 if( GL_FALSE
== assemble_dst(pAsm
) )
4346 if( pAsm
->D
.dst
.writey
) {
4348 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4353 /* dst.y = max(src.x, 0.0) */
4354 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
4355 pAsm
->D
.dst
.writex
= 0;
4356 pAsm
->D
.dst
.writey
= 1;
4357 pAsm
->D
.dst
.writez
= 0;
4358 pAsm
->D
.dst
.writew
= 0;
4359 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
);
4360 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4361 pAsm
->S
[1].src
.reg
= tmp
;
4362 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4363 noneg_PVSSRC(&(pAsm
->S
[1].src
));
4364 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_0
;
4365 pAsm
->S
[1].src
.swizzley
= SQ_SEL_0
;
4366 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_0
;
4367 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_0
;
4368 if( GL_FALSE
== next_ins(pAsm
) )
4374 if( GL_FALSE
== assemble_dst(pAsm
) )
4378 if ( pAsm
->D
.dst
.writez
) {
4380 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4385 /* dst.z = log(src.y) */
4386 if(8 == pAsm
->unAsic
)
4388 pAsm
->D
.dst
.opcode
= EG_OP2_INST_LOG_CLAMPED
;
4392 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_CLAMPED
;
4394 pAsm
->D
.dst
.math
= 1;
4395 pAsm
->D
.dst
.writex
= 0;
4396 pAsm
->D
.dst
.writey
= 0;
4397 pAsm
->D
.dst
.writez
= 1;
4398 pAsm
->D
.dst
.writew
= 0;
4399 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Y
, SQ_SEL_Y
, SQ_SEL_Y
, SQ_SEL_Y
);
4400 if( GL_FALSE
== next_ins(pAsm
) )
4405 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4410 if( GL_FALSE
== assemble_src(pAsm
, 0, 2) )
4415 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
);
4417 swizzleagain_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
, SQ_SEL_X
);
4419 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
4420 if(8 == pAsm
->unAsic
)
4422 pAsm
->D
.dst
.opcode
= EG_OP3_INST_MUL_LIT
;
4426 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MUL_LIT
;
4428 pAsm
->D
.dst
.math
= 1;
4429 pAsm
->D
.dst
.op3
= 1;
4430 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4431 pAsm
->D
.dst
.reg
= tmp
;
4432 pAsm
->D
.dst
.writex
= 1;
4433 pAsm
->D
.dst
.writey
= 0;
4434 pAsm
->D
.dst
.writez
= 0;
4435 pAsm
->D
.dst
.writew
= 0;
4438 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4439 pAsm
->S
[1].src
.reg
= dstReg
;
4440 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4441 noneg_PVSSRC(&(pAsm
->S
[1].src
));
4442 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_Z
;
4443 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Z
;
4444 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
4445 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_Z
;
4447 if( GL_FALSE
== next_ins(pAsm
) )
4452 /* dst.z = exp(tmp.x) */
4453 if( GL_FALSE
== assemble_dst(pAsm
) )
4457 if(8 == pAsm
->unAsic
)
4459 pAsm
->D
.dst
.opcode
= EG_OP2_INST_EXP_IEEE
;
4463 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
4465 pAsm
->D
.dst
.math
= 1;
4466 pAsm
->D
.dst
.writex
= 0;
4467 pAsm
->D
.dst
.writey
= 0;
4468 pAsm
->D
.dst
.writez
= 1;
4469 pAsm
->D
.dst
.writew
= 0;
4471 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4472 pAsm
->S
[0].src
.reg
= tmp
;
4473 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4474 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4475 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
4476 pAsm
->S
[0].src
.swizzley
= SQ_SEL_X
;
4477 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_X
;
4478 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_X
;
4480 if( GL_FALSE
== next_ins(pAsm
) )
4488 GLboolean
assemble_MAX(r700_AssemblerBase
*pAsm
)
4490 if( GL_FALSE
== checkop2(pAsm
) )
4495 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MAX
;
4497 if( GL_FALSE
== assemble_dst(pAsm
) )
4502 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4507 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4512 if( GL_FALSE
== next_ins(pAsm
) )
4520 GLboolean
assemble_MIN(r700_AssemblerBase
*pAsm
)
4522 if( GL_FALSE
== checkop2(pAsm
) )
4527 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MIN
;
4529 if( GL_FALSE
== assemble_dst(pAsm
) )
4534 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4539 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4544 if( GL_FALSE
== next_ins(pAsm
) )
4552 GLboolean
assemble_MOV(r700_AssemblerBase
*pAsm
)
4556 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4558 if (GL_FALSE
== assemble_dst(pAsm
))
4563 if (GL_FALSE
== assemble_src(pAsm
, 0, -1))
4568 if ( GL_FALSE
== next_ins(pAsm
) )
4576 GLboolean
assemble_MUL(r700_AssemblerBase
*pAsm
)
4578 if( GL_FALSE
== checkop2(pAsm
) )
4583 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
4585 if( GL_FALSE
== assemble_dst(pAsm
) )
4590 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4595 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4600 if( GL_FALSE
== next_ins(pAsm
) )
4608 GLboolean
assemble_POW(r700_AssemblerBase
*pAsm
)
4614 tmp
= gethelpr(pAsm
);
4616 // LG2 tmp.x, a.swizzle
4617 if(8 == pAsm
->unAsic
)
4619 pAsm
->D
.dst
.opcode
= EG_OP2_INST_LOG_IEEE
;
4623 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_LOG_IEEE
;
4625 pAsm
->D
.dst
.math
= 1;
4627 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4628 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4629 pAsm
->D
.dst
.reg
= tmp
;
4630 nomask_PVSDST(&(pAsm
->D
.dst
));
4632 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4637 if( GL_FALSE
== next_ins(pAsm
) )
4642 // MUL tmp.x, tmp.x, b.swizzle
4643 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
4645 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4646 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4647 pAsm
->D
.dst
.reg
= tmp
;
4648 nomask_PVSDST(&(pAsm
->D
.dst
));
4650 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4651 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4652 pAsm
->S
[0].src
.reg
= tmp
;
4653 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4654 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4656 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4661 if( GL_FALSE
== next_ins(pAsm
) )
4666 // EX2 dst.mask, tmp.x
4668 if(8 == pAsm
->unAsic
)
4670 pAsm
->D
.dst
.opcode
= EG_OP2_INST_EXP_IEEE
;
4674 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_EXP_IEEE
;
4676 pAsm
->D
.dst
.math
= 1;
4678 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4679 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4680 pAsm
->D
.dst
.reg
= tmp
;
4681 nomask_PVSDST(&(pAsm
->D
.dst
));
4683 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4684 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4685 pAsm
->S
[0].src
.reg
= tmp
;
4686 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4687 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4689 if( GL_FALSE
== next_ins(pAsm
) )
4694 // Now replicate result to all necessary channels in destination
4695 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
4697 if( GL_FALSE
== assemble_dst(pAsm
) )
4702 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4703 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
4704 pAsm
->S
[0].src
.reg
= tmp
;
4706 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4707 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4709 if( GL_FALSE
== next_ins(pAsm
) )
4717 GLboolean
assemble_RCP(r700_AssemblerBase
*pAsm
)
4719 if(8 == pAsm
->unAsic
)
4721 return assemble_math_function(pAsm
, EG_OP2_INST_RECIP_IEEE
);
4724 return assemble_math_function(pAsm
, SQ_OP2_INST_RECIP_IEEE
);
4727 GLboolean
assemble_RSQ(r700_AssemblerBase
*pAsm
)
4729 if(8 == pAsm
->unAsic
)
4731 return assemble_math_function(pAsm
, EG_OP2_INST_RECIPSQRT_IEEE
);
4734 return assemble_math_function(pAsm
, SQ_OP2_INST_RECIPSQRT_IEEE
);
4737 GLboolean
assemble_SCS(r700_AssemblerBase
*pAsm
)
4743 tmp
= gethelpr(pAsm
);
4745 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
4746 pAsm
->D
.dst
.op3
= 1;
4748 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4749 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4750 pAsm
->D
.dst
.reg
= tmp
;
4752 assemble_src(pAsm
, 0, -1);
4754 pAsm
->S
[1].src
.rtype
= SRC_REC_LITERAL
;
4755 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
4757 pAsm
->S
[2].src
.rtype
= SRC_REC_LITERAL
;
4758 setswizzle_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_Y
);
4760 pAsm
->D2
.dst2
.literal_slots
= 1;
4761 pAsm
->C
[0].f
= 1/(3.1415926535 * 2);
4762 pAsm
->C
[1].f
= 0.5F
;
4764 if ( GL_FALSE
== next_ins(pAsm
) )
4769 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_FRACT
;
4771 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4772 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4773 pAsm
->D
.dst
.reg
= tmp
;
4774 pAsm
->D
.dst
.writex
= 1;
4776 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4777 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4778 pAsm
->S
[0].src
.reg
= tmp
;
4779 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4781 if(( GL_FALSE
== next_ins(pAsm
) ))
4785 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
4786 pAsm
->D
.dst
.op3
= 1;
4788 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4789 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4790 pAsm
->D
.dst
.reg
= tmp
;
4792 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4793 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4794 pAsm
->S
[0].src
.reg
= tmp
;
4795 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4797 pAsm
->S
[1].src
.rtype
= SRC_REC_LITERAL
;
4798 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_X
);
4800 pAsm
->S
[2].src
.rtype
= SRC_REC_LITERAL
;
4801 setswizzle_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_Y
);
4803 pAsm
->D2
.dst2
.literal_slots
= 1;
4806 pAsm
->C
[0].f
= 3.1415926535897f
* 2.0f
;
4807 pAsm
->C
[1].f
= -3.1415926535897f
;
4809 pAsm
->C
[0].f
= 1.0f
;
4810 pAsm
->C
[1].f
= -0.5f
;
4813 if(( GL_FALSE
== next_ins(pAsm
) ))
4819 if(8 == pAsm
->unAsic
)
4821 pAsm
->D
.dst
.opcode
= EG_OP2_INST_COS
;
4825 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_COS
;
4827 pAsm
->D
.dst
.math
= 1;
4831 pAsm
->D
.dst
.writey
= 0;
4833 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4834 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4835 pAsm
->S
[0].src
.reg
= tmp
;
4836 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4837 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4839 if ( GL_FALSE
== next_ins(pAsm
) )
4845 if(8 == pAsm
->unAsic
)
4847 pAsm
->D
.dst
.opcode
= EG_OP2_INST_SIN
;
4851 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SIN
;
4853 pAsm
->D
.dst
.math
= 1;
4857 pAsm
->D
.dst
.writex
= 0;
4859 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4860 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4861 pAsm
->S
[0].src
.reg
= tmp
;
4862 setswizzle_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_X
);
4863 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4865 if( GL_FALSE
== next_ins(pAsm
) )
4873 GLboolean
assemble_LOGIC(r700_AssemblerBase
*pAsm
, BITS opcode
)
4875 if( GL_FALSE
== checkop2(pAsm
) )
4880 pAsm
->D
.dst
.opcode
= opcode
;
4881 //pAsm->D.dst.math = 1;
4883 if( GL_FALSE
== assemble_dst(pAsm
) )
4888 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4893 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4898 if( GL_FALSE
== next_ins(pAsm
) )
4906 GLboolean
assemble_LOGIC_PRED(r700_AssemblerBase
*pAsm
, BITS opcode
)
4908 struct prog_instruction
*pILInst
= &(pAsm
->pILInst
[pAsm
->uiCurInst
]);
4910 pAsm
->D
.dst
.opcode
= opcode
;
4911 pAsm
->D
.dst
.math
= 1;
4912 pAsm
->D
.dst
.predicated
= 1;
4914 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
4915 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
4916 pAsm
->D
.dst
.reg
= pAsm
->uHelpReg
;
4917 pAsm
->D
.dst
.writex
= 1;
4918 pAsm
->D
.dst
.writey
= pAsm
->D
.dst
.writez
= pAsm
->D
.dst
.writew
= 0;
4920 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
4921 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
4922 pAsm
->S
[0].src
.reg
= pAsm
->last_cond_register
+ pAsm
->starting_temp_register_number
;
4923 pAsm
->S
[0].src
.swizzlex
= pILInst
->DstReg
.CondSwizzle
& 0x7;
4924 noneg_PVSSRC(&(pAsm
->S
[0].src
));
4926 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
4927 pAsm
->S
[1].src
.reg
= pAsm
->uHelpReg
;
4928 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
4929 noneg_PVSSRC(&(pAsm
->S
[1].src
));
4930 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_0
;
4931 pAsm
->S
[1].src
.swizzley
= SQ_SEL_0
;
4932 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_0
;
4933 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_0
;
4935 if( GL_FALSE
== next_ins(pAsm
) )
4943 GLboolean
assemble_SGE(r700_AssemblerBase
*pAsm
)
4945 if( GL_FALSE
== checkop2(pAsm
) )
4950 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SETGE
;
4952 if( GL_FALSE
== assemble_dst(pAsm
) )
4957 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
4962 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
4967 if( GL_FALSE
== next_ins(pAsm
) )
4975 GLboolean
assemble_SLT(r700_AssemblerBase
*pAsm
)
4977 if( GL_FALSE
== checkop2(pAsm
) )
4982 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_SETGT
;
4984 if( GL_FALSE
== assemble_dst(pAsm
) )
4989 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
4994 if( GL_FALSE
== assemble_src(pAsm
, 1, 0) )
4999 if( GL_FALSE
== next_ins(pAsm
) )
5007 GLboolean
assemble_SSG(r700_AssemblerBase
*pAsm
)
5011 GLuint tmp
= gethelpr(pAsm
);
5012 /* tmp = (src > 0 ? 1 : src) */
5013 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_CNDGT
;
5014 pAsm
->D
.dst
.op3
= 1;
5015 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5016 pAsm
->D
.dst
.reg
= tmp
;
5018 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
5023 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_1
);
5025 if( GL_FALSE
== assemble_src(pAsm
, 0, 2) )
5030 if( GL_FALSE
== next_ins(pAsm
) )
5035 /* dst = (-tmp > 0 ? -1 : tmp) */
5036 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_CNDGT
;
5037 pAsm
->D
.dst
.op3
= 1;
5039 if( GL_FALSE
== assemble_dst(pAsm
) )
5044 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5045 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
5046 pAsm
->S
[0].src
.reg
= tmp
;
5047 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
5048 neg_PVSSRC(&(pAsm
->S
[0].src
));
5050 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_1
);
5051 neg_PVSSRC(&(pAsm
->S
[1].src
));
5053 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
), ADDR_ABSOLUTE
);
5054 pAsm
->S
[2].src
.rtype
= SRC_REG_TEMPORARY
;
5055 pAsm
->S
[2].src
.reg
= tmp
;
5056 noswizzle_PVSSRC(&(pAsm
->S
[2].src
));
5058 if( GL_FALSE
== next_ins(pAsm
) )
5066 GLboolean
assemble_STP(r700_AssemblerBase
*pAsm
)
5071 GLboolean
assemble_TEX(r700_AssemblerBase
*pAsm
)
5073 GLboolean src_const
;
5074 GLboolean need_barrier
= GL_FALSE
;
5078 switch (pAsm
->pILInst
[pAsm
->uiCurInst
].SrcReg
[0].File
)
5080 case PROGRAM_UNIFORM
:
5081 case PROGRAM_CONSTANT
:
5082 case PROGRAM_LOCAL_PARAM
:
5083 case PROGRAM_ENV_PARAM
:
5084 case PROGRAM_STATE_VAR
:
5085 src_const
= GL_TRUE
;
5087 case PROGRAM_TEMPORARY
:
5090 src_const
= GL_FALSE
;
5094 if (GL_TRUE
== src_const
)
5096 if ( GL_FALSE
== mov_temp(pAsm
, 0) )
5098 need_barrier
= GL_TRUE
;
5101 if (pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_TXP
)
5103 GLuint tmp
= gethelpr(pAsm
);
5104 if(8 == pAsm
->unAsic
)
5106 pAsm
->D
.dst
.opcode
= EG_OP2_INST_RECIP_IEEE
;
5110 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_RECIP_IEEE
;
5112 pAsm
->D
.dst
.math
= 1;
5113 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
5114 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5115 pAsm
->D
.dst
.reg
= tmp
;
5116 pAsm
->D
.dst
.writew
= 1;
5118 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
5122 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
, SQ_SEL_W
);
5123 if( GL_FALSE
== next_ins(pAsm
) )
5128 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
5129 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
5130 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5131 pAsm
->D
.dst
.reg
= tmp
;
5132 pAsm
->D
.dst
.writex
= 1;
5133 pAsm
->D
.dst
.writey
= 1;
5134 pAsm
->D
.dst
.writez
= 1;
5135 pAsm
->D
.dst
.writew
= 0;
5137 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
5141 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
5142 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
5143 pAsm
->S
[1].src
.reg
= tmp
;
5144 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_W
);
5146 if( GL_FALSE
== next_ins(pAsm
) )
5151 pAsm
->aArgSubst
[1] = tmp
;
5152 need_barrier
= GL_TRUE
;
5155 if (pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcTarget
== TEXTURE_CUBE_INDEX
)
5157 GLuint tmp1
= gethelpr(pAsm
);
5158 GLuint tmp2
= gethelpr(pAsm
);
5160 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
5161 if(8 == pAsm
->unAsic
)
5163 pAsm
->D
.dst
.opcode
= EG_OP2_INST_CUBE
;
5167 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_CUBE
;
5169 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
5170 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5171 pAsm
->D
.dst
.reg
= tmp1
;
5172 nomask_PVSDST(&(pAsm
->D
.dst
));
5174 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
5179 if( GL_FALSE
== assemble_src(pAsm
, 0, 1) )
5184 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Z
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
);
5185 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Y
, SQ_SEL_X
, SQ_SEL_Z
, SQ_SEL_Z
);
5187 if( GL_FALSE
== next_ins(pAsm
) )
5192 /* tmp1.z = RCP_e(|tmp1.z|) */
5193 if(8 == pAsm
->unAsic
)
5195 pAsm
->D
.dst
.opcode
= EG_OP2_INST_RECIP_IEEE
;
5199 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_RECIP_IEEE
;
5201 pAsm
->D
.dst
.math
= 1;
5202 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
5203 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5204 pAsm
->D
.dst
.reg
= tmp1
;
5205 pAsm
->D
.dst
.writez
= 1;
5207 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5208 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
5209 pAsm
->S
[0].src
.reg
= tmp1
;
5210 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_Z
;
5211 pAsm
->S
[0].src
.abs
= 1;
5215 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
5216 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
5217 * muladd has no writemask, have to use another temp
5219 if(8 == pAsm
->unAsic
)
5221 pAsm
->D
.dst
.opcode
= EG_OP3_INST_MULADD
;
5225 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
5227 pAsm
->D
.dst
.op3
= 1;
5228 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
5229 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5230 pAsm
->D
.dst
.reg
= tmp2
;
5232 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5233 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
5234 pAsm
->S
[0].src
.reg
= tmp1
;
5235 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
5236 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
5237 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
5238 pAsm
->S
[1].src
.reg
= tmp1
;
5239 setswizzle_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Z
);
5240 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
), ADDR_ABSOLUTE
);
5241 /* immediate c 1.5 */
5242 pAsm
->D2
.dst2
.literal_slots
= 1;
5243 pAsm
->C
[0].f
= 1.5F
;
5244 pAsm
->S
[2].src
.rtype
= SRC_REC_LITERAL
;
5245 pAsm
->S
[2].src
.reg
= tmp1
;
5246 setswizzle_PVSSRC(&(pAsm
->S
[2].src
), SQ_SEL_X
);
5250 /* tmp1.xy = temp2.xy */
5251 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
5252 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
5253 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5254 pAsm
->D
.dst
.reg
= tmp1
;
5255 pAsm
->D
.dst
.writex
= 1;
5256 pAsm
->D
.dst
.writey
= 1;
5257 pAsm
->D
.dst
.writez
= 0;
5258 pAsm
->D
.dst
.writew
= 0;
5260 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5261 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
5262 pAsm
->S
[0].src
.reg
= tmp2
;
5263 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
5266 pAsm
->aArgSubst
[1] = tmp1
;
5267 need_barrier
= GL_TRUE
;
5271 switch(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
)
5274 /* will these need WQM(1) on CF inst ? */
5275 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_GET_GRADIENTS_H
;
5278 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_GET_GRADIENTS_V
;
5281 /* this should actually be SAMPLE_LB but that needs bias to be
5282 * embedded in the instruction - cant do here */
5283 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE_L
;
5286 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE_L
;
5289 if(pAsm
->pILInst
[pAsm
->uiCurInst
].TexShadow
== 1)
5290 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE_C
;
5292 pAsm
->D
.dst
.opcode
= SQ_TEX_INST_SAMPLE
;
5295 pAsm
->is_tex
= GL_TRUE
;
5296 if ( GL_TRUE
== need_barrier
)
5298 pAsm
->is_tex
= GL_TRUE
;
5299 if ( GL_TRUE
== need_barrier
)
5301 pAsm
->need_tex_barrier
= GL_TRUE
;
5303 // Set src1 to tex unit id
5304 pAsm
->S
[1].src
.reg
= pAsm
->SamplerUnits
[pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcUnit
];
5305 pAsm
->S
[1].src
.rtype
= SRC_REG_TEMPORARY
;
5307 //No sw info from mesa compiler, so hard code here.
5308 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_X
;
5309 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Y
;
5310 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
5311 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_W
;
5313 if( GL_FALSE
== tex_dst(pAsm
) )
5318 if( GL_FALSE
== tex_src(pAsm
) )
5323 if(pAsm
->pILInst
[pAsm
->uiCurInst
].Opcode
== OPCODE_TXP
)
5325 /* hopefully did swizzles before */
5326 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
5329 if(pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcTarget
== TEXTURE_CUBE_INDEX
)
5331 /* SAMPLE dst, tmp.yxwy, CUBE */
5332 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_Y
;
5333 pAsm
->S
[0].src
.swizzley
= SQ_SEL_X
;
5334 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_W
;
5335 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_Y
;
5338 if(pAsm
->pILInst
[pAsm
->uiCurInst
].TexShadow
== 1)
5340 /* compare value goes to w chan ? */
5341 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_Z
;
5344 if ( GL_FALSE
== next_ins(pAsm
) )
5349 /* add ARB shadow ambient but clamp to 0..1 */
5350 if(pAsm
->pILInst
[pAsm
->uiCurInst
].TexShadow
== 1)
5352 /* ADD_SAT dst, dst, ambient[texunit] */
5353 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_ADD
;
5355 if( GL_FALSE
== assemble_dst(pAsm
) )
5359 pAsm
->D2
.dst2
.SaturateMode
= 1;
5361 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
5362 pAsm
->S
[0].src
.reg
= pAsm
->D
.dst
.reg
;
5363 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
5364 noneg_PVSSRC(&(pAsm
->S
[0].src
));
5366 pAsm
->S
[1].src
.rtype
= SRC_REG_CONSTANT
;
5367 pAsm
->S
[1].src
.reg
= pAsm
->shadow_regs
[pAsm
->pILInst
[pAsm
->uiCurInst
].TexSrcUnit
];
5368 noswizzle_PVSSRC(&(pAsm
->S
[1].src
));
5369 noneg_PVSSRC(&(pAsm
->S
[1].src
));
5371 if( GL_FALSE
== next_ins(pAsm
) )
5381 GLboolean
assemble_XPD(r700_AssemblerBase
*pAsm
)
5386 if( GL_FALSE
== checkop2(pAsm
) )
5391 tmp1
= gethelpr(pAsm
);
5393 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MUL
;
5395 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
5396 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5397 pAsm
->D
.dst
.reg
= tmp1
;
5398 nomask_PVSDST(&(pAsm
->D
.dst
));
5400 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
5405 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
5410 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
, SQ_SEL_0
);
5411 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Y
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_0
);
5413 if( GL_FALSE
== next_ins(pAsm
) )
5418 if(8 == pAsm
->unAsic
)
5420 pAsm
->D
.dst
.opcode
= EG_OP3_INST_MULADD
;
5424 pAsm
->D
.dst
.opcode
= SQ_OP3_INST_MULADD
;
5426 pAsm
->D
.dst
.op3
= 1;
5428 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
5430 tmp2
= gethelpr(pAsm
);
5432 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
5433 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
5434 pAsm
->D
.dst
.reg
= tmp2
;
5436 nomask_PVSDST(&(pAsm
->D
.dst
));
5440 if( GL_FALSE
== assemble_dst(pAsm
) )
5446 if( GL_FALSE
== assemble_src(pAsm
, 0, -1) )
5451 if( GL_FALSE
== assemble_src(pAsm
, 1, -1) )
5456 swizzleagain_PVSSRC(&(pAsm
->S
[0].src
), SQ_SEL_Y
, SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_0
);
5457 swizzleagain_PVSSRC(&(pAsm
->S
[1].src
), SQ_SEL_Z
, SQ_SEL_X
, SQ_SEL_Y
, SQ_SEL_0
);
5459 // result1 + (neg) result0
5460 setaddrmode_PVSSRC(&(pAsm
->S
[2].src
),ADDR_ABSOLUTE
);
5461 pAsm
->S
[2].src
.rtype
= SRC_REG_TEMPORARY
;
5462 pAsm
->S
[2].src
.reg
= tmp1
;
5464 neg_PVSSRC(&(pAsm
->S
[2].src
));
5465 noswizzle_PVSSRC(&(pAsm
->S
[2].src
));
5467 if( GL_FALSE
== next_ins(pAsm
) )
5473 if(0xF != pAsm
->pILInst
[pAsm
->uiCurInst
].DstReg
.WriteMask
)
5475 if( GL_FALSE
== assemble_dst(pAsm
) )
5480 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
5482 // Use tmp as source
5483 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
5484 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
5485 pAsm
->S
[0].src
.reg
= tmp2
;
5487 noneg_PVSSRC(&(pAsm
->S
[0].src
));
5488 noswizzle_PVSSRC(&(pAsm
->S
[0].src
));
5490 if( GL_FALSE
== next_ins(pAsm
) )
5499 GLboolean
assemble_EXPORT(r700_AssemblerBase
*pAsm
)
5504 static inline void decreaseCurrent(r700_AssemblerBase
*pAsm
, GLuint uReason
)
5509 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
--;
5512 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
-= 4;
5515 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
-= 4;
5518 /* TODO : for 16 vp asic, should -= 2; */
5519 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
-= 1;
5524 static inline void checkStackDepth(r700_AssemblerBase
*pAsm
, GLuint uReason
, GLboolean bCheckMaxOnly
)
5526 if(GL_TRUE
== bCheckMaxOnly
)
5531 if((pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 1)
5532 > pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
5534 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
=
5535 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 1;
5539 if((pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 4)
5540 > pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
5542 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
=
5543 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+ 4;
5553 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
++;
5556 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+= 4;
5559 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+= 4;
5562 /* TODO : for 16 vp asic, should += 2; */
5563 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
+= 1;
5567 if(pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
5568 > pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
5570 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
=
5571 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
;
5575 GLboolean
jumpToOffest(r700_AssemblerBase
*pAsm
, GLuint pops
, GLint offset
)
5577 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5582 if(8 == pAsm
->unAsic
)
5584 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5586 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
5587 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5589 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
5590 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5592 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
5593 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5595 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
5596 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5598 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
5599 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5601 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
5602 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5604 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
5605 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5607 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
5608 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5610 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
5614 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= pops
;
5615 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5616 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5618 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5619 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5620 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_JUMP
;
5621 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5623 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5626 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ offset
;
5631 GLboolean
pops(r700_AssemblerBase
*pAsm
, GLuint pops
)
5633 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5638 if(8 == pAsm
->unAsic
)
5640 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5642 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
5643 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5645 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
5646 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5648 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
5649 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5651 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
5652 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5654 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
5655 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5657 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
5658 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5660 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
5661 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5663 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
5664 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5666 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
5670 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= pops
;
5671 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5672 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5674 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5675 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5676 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_POP
;
5678 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5680 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5682 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
5687 GLboolean
assemble_IF(r700_AssemblerBase
*pAsm
, GLboolean bHasElse
)
5689 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
5691 assemble_LOGIC_PRED(pAsm
, SQ_OP2_INST_PRED_SETNE
);
5694 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5699 if(8 == pAsm
->unAsic
)
5701 if(GL_TRUE
!= bHasElse
)
5703 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5705 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
5709 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5711 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
5714 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5716 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
5717 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5719 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
5720 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5722 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
5723 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5725 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
5726 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5728 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
5729 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5731 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
5732 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5734 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
5735 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5737 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
5741 if(GL_TRUE
!= bHasElse
)
5743 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
5747 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
5749 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5750 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5752 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5753 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5754 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_JUMP
;
5755 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5757 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5761 pAsm
->fc_stack
[pAsm
->FCSP
].type
= FC_IF
;
5762 pAsm
->fc_stack
[pAsm
->FCSP
].mid
= NULL
;
5763 pAsm
->fc_stack
[pAsm
->FCSP
].midLen
= 0;
5764 pAsm
->fc_stack
[pAsm
->FCSP
].first
= pAsm
->cf_current_cf_clause_ptr
;
5766 #ifndef USE_CF_FOR_POP_AFTER
5767 if(GL_TRUE
!= bHasElse
)
5769 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_POP_AFTER
;
5771 #endif /* USE_CF_FOR_POP_AFTER */
5773 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_FALSE
);
5778 GLboolean
assemble_ELSE(r700_AssemblerBase
*pAsm
)
5780 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5785 if(8 == pAsm
->unAsic
)
5787 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5789 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
5790 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5792 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
5793 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5795 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
5796 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5798 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
5799 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5801 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
5802 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5804 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
5805 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5807 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
5808 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5810 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
5811 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5813 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
5817 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1; ///
5818 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5819 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5821 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5822 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5823 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_ELSE
;
5824 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5826 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5829 pAsm
->fc_stack
[pAsm
->FCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc( (void *)pAsm
->fc_stack
[pAsm
->FCSP
].mid
,
5831 sizeof(R700ControlFlowGenericClause
*) );
5832 pAsm
->fc_stack
[pAsm
->FCSP
].mid
[0] = pAsm
->cf_current_cf_clause_ptr
;
5833 //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1;
5835 #ifndef USE_CF_FOR_POP_AFTER
5836 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_POP_AFTER
;
5837 #endif /* USE_CF_FOR_POP_AFTER */
5839 pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_Word0
.f
.addr
= pAsm
->pR700Shader
->plstCFInstructions_active
->uNumOfNode
- 1;
5844 GLboolean
assemble_ENDIF(r700_AssemblerBase
*pAsm
)
5846 #ifdef USE_CF_FOR_POP_AFTER
5848 #endif /* USE_CF_FOR_POP_AFTER */
5850 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
5852 if(NULL
== pAsm
->fc_stack
[pAsm
->FCSP
].mid
)
5854 /* no else in between */
5855 pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_Word0
.f
.addr
= pAsm
->pR700Shader
->plstCFInstructions_active
->uNumOfNode
;
5859 pAsm
->fc_stack
[pAsm
->FCSP
].mid
[0]->m_Word0
.f
.addr
= pAsm
->pR700Shader
->plstCFInstructions_active
->uNumOfNode
;
5862 if(NULL
!= pAsm
->fc_stack
[pAsm
->FCSP
].mid
)
5864 FREE(pAsm
->fc_stack
[pAsm
->FCSP
].mid
);
5867 if(pAsm
->fc_stack
[pAsm
->FCSP
].type
!= FC_IF
)
5869 radeon_error("if/endif in shader code are not paired. \n");
5875 decreaseCurrent(pAsm
, FC_PUSH_VPM
);
5880 GLboolean
assemble_BGNLOOP(r700_AssemblerBase
*pAsm
)
5882 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5887 if(8 == pAsm
->unAsic
)
5889 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5891 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
5892 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5893 EG_CF_INST_LOOP_START_NO_AL
,
5894 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
5895 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5897 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
5898 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5900 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
5901 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5903 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
5904 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5906 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
5907 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5909 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
5910 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5912 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
5913 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5915 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
5919 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
5920 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
5921 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
5923 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
5924 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
5925 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_START_NO_AL
;
5926 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
5928 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
5932 pAsm
->fc_stack
[pAsm
->FCSP
].type
= FC_LOOP
;
5933 pAsm
->fc_stack
[pAsm
->FCSP
].mid
= NULL
;
5934 pAsm
->fc_stack
[pAsm
->FCSP
].unNumMid
= 0;
5935 pAsm
->fc_stack
[pAsm
->FCSP
].midLen
= 0;
5936 pAsm
->fc_stack
[pAsm
->FCSP
].first
= pAsm
->cf_current_cf_clause_ptr
;
5938 checkStackDepth(pAsm
, FC_LOOP
, GL_FALSE
);
5943 GLboolean
assemble_BRK(r700_AssemblerBase
*pAsm
)
5945 #ifdef USE_CF_FOR_CONTINUE_BREAK
5947 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
5949 assemble_LOGIC_PRED(pAsm
, SQ_OP2_INST_PRED_SETNE
);
5951 unsigned int unFCSP
;
5952 for(unFCSP
=pAsm
->FCSP
; unFCSP
>0; unFCSP
--)
5954 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
5961 radeon_error("Break is not inside loop/endloop pair.\n");
5965 if(GL_FALSE
== add_cf_instruction(pAsm
) )
5970 if(8 == pAsm
->unAsic
)
5972 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5974 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
5975 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5976 EG_CF_INST_LOOP_BREAK
,
5977 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
5978 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5980 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
5981 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5983 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
5984 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5986 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
5987 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5989 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
5990 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5992 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
5993 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5995 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
5996 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
5998 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
6002 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
6003 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
6004 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
6006 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
6007 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
6008 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_BREAK
;
6010 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
6012 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
6015 pAsm
->fc_stack
[unFCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc(
6016 (void *)pAsm
->fc_stack
[unFCSP
].mid
,
6017 sizeof(R700ControlFlowGenericClause
*) * pAsm
->fc_stack
[unFCSP
].unNumMid
,
6018 sizeof(R700ControlFlowGenericClause
*) * (pAsm
->fc_stack
[unFCSP
].unNumMid
+ 1) );
6019 pAsm
->fc_stack
[unFCSP
].mid
[pAsm
->fc_stack
[unFCSP
].unNumMid
] = pAsm
->cf_current_cf_clause_ptr
;
6020 pAsm
->fc_stack
[unFCSP
].unNumMid
++;
6022 if(GL_FALSE
== add_cf_instruction(pAsm
) )
6027 if(8 == pAsm
->unAsic
)
6029 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6031 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
6032 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6034 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
6035 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6037 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
6038 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6040 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
6041 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6043 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
6044 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6046 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
6047 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6049 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
6050 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6052 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
6053 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6055 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
6059 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
6060 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
6061 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
6063 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
6064 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
6065 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_POP
;
6067 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
6069 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
6072 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
6074 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_TRUE
);
6076 #endif //USE_CF_FOR_CONTINUE_BREAK
6080 GLboolean
assemble_CONT(r700_AssemblerBase
*pAsm
)
6082 #ifdef USE_CF_FOR_CONTINUE_BREAK
6083 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6085 assemble_LOGIC_PRED(pAsm
, SQ_OP2_INST_PRED_SETNE
);
6087 unsigned int unFCSP
;
6088 for(unFCSP
=pAsm
->FCSP
; unFCSP
>0; unFCSP
--)
6090 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
6097 radeon_error("Continue is not inside loop/endloop pair.\n");
6101 if(GL_FALSE
== add_cf_instruction(pAsm
) )
6106 if(8 == pAsm
->unAsic
)
6108 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6110 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
6111 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6112 EG_CF_INST_LOOP_CONTINUE
,
6113 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
6114 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6116 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
6117 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6119 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
6120 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6122 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
6123 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6125 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
6126 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6128 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
6129 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6131 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
6132 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6134 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
6138 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
6139 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
6140 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
6142 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
6143 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
6144 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_CONTINUE
;
6146 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
6148 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
6151 pAsm
->fc_stack
[unFCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc(
6152 (void *)pAsm
->fc_stack
[unFCSP
].mid
,
6153 sizeof(R700ControlFlowGenericClause
*) * pAsm
->fc_stack
[unFCSP
].unNumMid
,
6154 sizeof(R700ControlFlowGenericClause
*) * (pAsm
->fc_stack
[unFCSP
].unNumMid
+ 1) );
6155 pAsm
->fc_stack
[unFCSP
].mid
[pAsm
->fc_stack
[unFCSP
].unNumMid
] = pAsm
->cf_current_cf_clause_ptr
;
6156 pAsm
->fc_stack
[unFCSP
].unNumMid
++;
6158 if(GL_FALSE
== add_cf_instruction(pAsm
) )
6163 if(8 == pAsm
->unAsic
)
6165 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6167 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
6168 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6170 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
6171 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6173 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
6174 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6176 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
6177 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6179 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
6180 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6182 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
6183 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6185 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
6186 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6188 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
6189 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6191 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
6195 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
6196 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
6197 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
6199 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
6200 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
6201 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_POP
;
6203 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
6205 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
6208 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
6210 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_TRUE
);
6212 #endif /* USE_CF_FOR_CONTINUE_BREAK */
6217 GLboolean
assemble_ENDLOOP(r700_AssemblerBase
*pAsm
)
6221 if(GL_FALSE
== add_cf_instruction(pAsm
) )
6226 if(8 == pAsm
->unAsic
)
6228 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6230 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
6231 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6232 EG_CF_INST_LOOP_END
,
6233 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
6234 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6236 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
6237 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6239 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
6240 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6242 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
6243 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6245 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
6246 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6248 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
6249 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6251 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
6252 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6254 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
6258 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
6259 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
6260 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
6262 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
6263 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
6264 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_END
;
6265 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
6267 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
6270 pAsm
->cf_current_cf_clause_ptr
->m_Word0
.f
.addr
= pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_uIndex
+ 1;
6271 pAsm
->fc_stack
[pAsm
->FCSP
].first
->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
+ 1;
6273 #ifdef USE_CF_FOR_CONTINUE_BREAK
6274 for(i
=0; i
<pAsm
->fc_stack
[pAsm
->FCSP
].unNumMid
; i
++)
6276 pAsm
->fc_stack
[pAsm
->FCSP
].mid
[i
]->m_Word0
.f
.addr
= pAsm
->cf_current_cf_clause_ptr
->m_uIndex
;
6278 if(NULL
!= pAsm
->fc_stack
[pAsm
->FCSP
].mid
)
6280 FREE(pAsm
->fc_stack
[pAsm
->FCSP
].mid
);
6284 if(pAsm
->fc_stack
[pAsm
->FCSP
].type
!= FC_LOOP
)
6286 radeon_error("loop/endloop in shader code are not paired. \n");
6292 if((pAsm
->unCFflags
& HAS_CURRENT_LOOPRET
) > 0)
6294 for(unFCSP
=(pAsm
->FCSP
-1); unFCSP
>pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
; unFCSP
--)
6296 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
6298 breakLoopOnFlag(pAsm
, unFCSP
);
6301 else if(FC_IF
== pAsm
->fc_stack
[unFCSP
].type
)
6306 if(unFCSP
<= pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
)
6308 #ifdef USE_CF_FOR_POP_AFTER
6309 returnOnFlag(pAsm
, unIF
);
6311 returnOnFlag(pAsm
, 0);
6312 #endif /* USE_CF_FOR_POP_AFTER */
6313 pAsm
->unCFflags
&= ~HAS_CURRENT_LOOPRET
;
6319 decreaseCurrent(pAsm
, FC_LOOP
);
6324 void add_return_inst(r700_AssemblerBase
*pAsm
)
6326 if(GL_FALSE
== add_cf_instruction(pAsm
) )
6331 if(8 == pAsm
->unAsic
)
6333 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6335 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
6336 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6338 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
6339 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6341 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
6342 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6344 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
6345 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6347 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
6348 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6350 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
6351 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6353 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
6354 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6356 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
6357 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6359 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
6363 //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
6364 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
6365 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
6366 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
6368 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
6369 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
6370 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_RETURN
;
6371 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
6373 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
6377 GLboolean
assemble_BGNSUB(r700_AssemblerBase
*pAsm
, GLint nILindex
, GLuint uiIL_Shift
)
6380 if( (pAsm
->unSubArrayPointer
+ 1) > pAsm
->unSubArraySize
)
6382 pAsm
->subs
= (SUB_OFFSET
*)_mesa_realloc( (void *)pAsm
->subs
,
6383 sizeof(SUB_OFFSET
) * pAsm
->unSubArraySize
,
6384 sizeof(SUB_OFFSET
) * (pAsm
->unSubArraySize
+ 10) );
6385 if(NULL
== pAsm
->subs
)
6389 pAsm
->unSubArraySize
+= 10;
6392 pAsm
->subs
[pAsm
->unSubArrayPointer
].subIL_Offset
= nILindex
+ uiIL_Shift
;
6393 pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
.pHead
=NULL
;
6394 pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
.pTail
=NULL
;
6395 pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
.uNumOfNode
=0;
6398 pAsm
->CALLSTACK
[pAsm
->CALLSP
].subDescIndex
= pAsm
->unSubArrayPointer
;
6399 pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
= pAsm
->FCSP
;
6400 pAsm
->CALLSTACK
[pAsm
->CALLSP
].plstCFInstructions_local
6401 = &(pAsm
->subs
[pAsm
->unSubArrayPointer
].lstCFInstructions_local
);
6402 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
= 0;
6403 pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
= 0;
6404 SetActiveCFlist(pAsm
->pR700Shader
,
6405 pAsm
->CALLSTACK
[pAsm
->CALLSP
].plstCFInstructions_local
);
6407 pAsm
->unSubArrayPointer
++;
6410 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
6413 pAsm
->fc_stack
[pAsm
->FCSP
].type
= FC_REP
;
6415 checkStackDepth(pAsm
, FC_REP
, GL_FALSE
);
6420 GLboolean
assemble_ENDSUB(r700_AssemblerBase
*pAsm
)
6422 if(pAsm
->fc_stack
[pAsm
->FCSP
].type
!= FC_REP
)
6424 radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n");
6428 /* copy max to sub structure */
6429 pAsm
->subs
[pAsm
->CALLSTACK
[pAsm
->CALLSP
].subDescIndex
].unStackDepthMax
6430 = pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
;
6432 decreaseCurrent(pAsm
, FC_REP
);
6435 SetActiveCFlist(pAsm
->pR700Shader
,
6436 pAsm
->CALLSTACK
[pAsm
->CALLSP
].plstCFInstructions_local
);
6438 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
6445 GLboolean
assemble_RET(r700_AssemblerBase
*pAsm
)
6449 if(pAsm
->CALLSP
> 0)
6452 for(unFCSP
=pAsm
->FCSP
; unFCSP
>pAsm
->CALLSTACK
[pAsm
->CALLSP
].FCSP_BeforeEntry
; unFCSP
--)
6454 if(FC_LOOP
== pAsm
->fc_stack
[unFCSP
].type
)
6456 setRetInLoopFlag(pAsm
, SQ_SEL_1
);
6457 breakLoopOnFlag(pAsm
, unFCSP
);
6458 pAsm
->unCFflags
|= LOOPRET_FLAGS
;
6462 else if(FC_IF
== pAsm
->fc_stack
[unFCSP
].type
)
6469 #ifdef USE_CF_FOR_POP_AFTER
6474 #endif /* USE_CF_FOR_POP_AFTER */
6476 add_return_inst(pAsm
);
6481 GLboolean
assemble_CAL(r700_AssemblerBase
*pAsm
,
6484 GLuint uiNumberInsts
,
6485 struct prog_instruction
*pILInst
,
6486 PRESUB_DESC
* pPresubDesc
)
6490 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
6492 if(GL_FALSE
== add_cf_instruction(pAsm
) )
6497 if(8 == pAsm
->unAsic
)
6499 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6501 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
6502 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6504 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
6505 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6507 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
6508 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6510 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
6511 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6513 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
6514 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6516 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
6517 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6519 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
6520 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6522 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
6523 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6525 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
6529 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.call_count
= 1;
6530 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 0;
6531 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
6532 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
6534 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
6535 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
6536 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_CALL
;
6537 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
6539 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
6543 if( (pAsm
->unCallerArrayPointer
+ 1) > pAsm
->unCallerArraySize
)
6545 pAsm
->callers
= (CALLER_POINTER
*)_mesa_realloc( (void *)pAsm
->callers
,
6546 sizeof(CALLER_POINTER
) * pAsm
->unCallerArraySize
,
6547 sizeof(CALLER_POINTER
) * (pAsm
->unCallerArraySize
+ 10) );
6548 if(NULL
== pAsm
->callers
)
6552 pAsm
->unCallerArraySize
+= 10;
6555 uiIL_Offset
= nILindex
+ uiIL_Shift
;
6556 pAsm
->callers
[pAsm
->unCallerArrayPointer
].subIL_Offset
= uiIL_Offset
;
6557 pAsm
->callers
[pAsm
->unCallerArrayPointer
].cf_ptr
= pAsm
->cf_current_cf_clause_ptr
;
6559 pAsm
->callers
[pAsm
->unCallerArrayPointer
].finale_cf_ptr
= NULL
;
6560 pAsm
->callers
[pAsm
->unCallerArrayPointer
].prelude_cf_ptr
= NULL
;
6562 pAsm
->unCallerArrayPointer
++;
6568 for(j
=0; j
<pAsm
->unSubArrayPointer
; j
++)
6570 if(uiIL_Offset
== pAsm
->subs
[j
].subIL_Offset
)
6571 { /* compiled before */
6573 max
= pAsm
->subs
[j
].unStackDepthMax
6574 + pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
;
6575 if(max
> pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
6577 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
= max
;
6580 pAsm
->callers
[pAsm
->unCallerArrayPointer
- 1].subDescIndex
= j
;
6585 pAsm
->callers
[pAsm
->unCallerArrayPointer
- 1].subDescIndex
= pAsm
->unSubArrayPointer
;
6586 unSubID
= pAsm
->unSubArrayPointer
;
6588 bRet
= AssembleInstr(nILindex
, uiIL_Shift
, uiNumberInsts
, pILInst
, pAsm
);
6592 max
= pAsm
->subs
[unSubID
].unStackDepthMax
6593 + pAsm
->CALLSTACK
[pAsm
->CALLSP
].current
;
6594 if(max
> pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
)
6596 pAsm
->CALLSTACK
[pAsm
->CALLSP
].max
= max
;
6599 pAsm
->subs
[unSubID
].pPresubDesc
= pPresubDesc
;
6605 GLboolean
setRetInLoopFlag(r700_AssemblerBase
*pAsm
, GLuint flagValue
)
6607 /*GLfloat fLiteral[2] = {0.1, 0.0};*/
6609 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
6610 pAsm
->D
.dst
.op3
= 0;
6611 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
6612 pAsm
->D
.dst
.reg
= pAsm
->flag_reg_index
;
6613 pAsm
->D
.dst
.writex
= 1;
6614 pAsm
->D
.dst
.writey
= 0;
6615 pAsm
->D
.dst
.writez
= 0;
6616 pAsm
->D
.dst
.writew
= 0;
6617 pAsm
->D2
.dst2
.literal_slots
= 1;
6618 pAsm
->D2
.dst2
.SaturateMode
= SATURATE_OFF
;
6619 pAsm
->D
.dst
.predicated
= 0;
6620 /* in reloc where dislink flag init inst, only one slot alu inst is handled. */
6621 pAsm
->D
.dst
.math
= 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */
6622 pAsm
->D2
.dst2
.index_mode
= SQ_INDEX_LOOP
; /* Check this ! */
6624 pAsm
->S
[0].src
.rtype
= SRC_REC_LITERAL
;
6625 //pAsm->S[0].src.reg = 0;
6626 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
6627 noneg_PVSSRC(&(pAsm
->S
[0].src
));
6628 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
6629 pAsm
->S
[0].src
.swizzley
= SQ_SEL_Y
;
6630 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_Z
;
6631 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_W
;
6633 if( GL_FALSE
== next_ins_literal(pAsm
, &(fLiteral
[0])) )
6638 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
6639 pAsm
->S
[0].src
.reg
= 0;
6640 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
6641 noneg_PVSSRC(&(pAsm
->S
[0].src
));
6642 pAsm
->S
[0].src
.swizzlex
= flagValue
;
6643 pAsm
->S
[0].src
.swizzley
= flagValue
;
6644 pAsm
->S
[0].src
.swizzlez
= flagValue
;
6645 pAsm
->S
[0].src
.swizzlew
= flagValue
;
6647 if( GL_FALSE
== next_ins(pAsm
) )
6656 GLboolean
testFlag(r700_AssemblerBase
*pAsm
)
6658 /*GLfloat fLiteral[2] = {0.1, 0.0};*/
6661 GLuint tmp
= gethelpr(pAsm
);
6662 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU_PUSH_BEFORE
;
6664 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_PRED_SETE
;
6665 pAsm
->D
.dst
.math
= 1;
6666 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
6667 pAsm
->D
.dst
.reg
= tmp
;
6668 pAsm
->D
.dst
.writex
= 1;
6669 pAsm
->D
.dst
.writey
= 0;
6670 pAsm
->D
.dst
.writez
= 0;
6671 pAsm
->D
.dst
.writew
= 0;
6672 pAsm
->D2
.dst2
.literal_slots
= 1;
6673 pAsm
->D2
.dst2
.SaturateMode
= SATURATE_OFF
;
6674 pAsm
->D
.dst
.predicated
= 1;
6675 pAsm
->D2
.dst2
.index_mode
= SQ_INDEX_LOOP
; /* Check this ! */
6677 pAsm
->S
[0].src
.rtype
= DST_REG_TEMPORARY
;
6678 pAsm
->S
[0].src
.reg
= pAsm
->flag_reg_index
;
6679 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
6680 noneg_PVSSRC(&(pAsm
->S
[0].src
));
6681 pAsm
->S
[0].src
.swizzlex
= SQ_SEL_X
;
6682 pAsm
->S
[0].src
.swizzley
= SQ_SEL_Y
;
6683 pAsm
->S
[0].src
.swizzlez
= SQ_SEL_Z
;
6684 pAsm
->S
[0].src
.swizzlew
= SQ_SEL_W
;
6686 pAsm
->S
[1].src
.rtype
= SRC_REC_LITERAL
;
6687 //pAsm->S[1].src.reg = 0;
6688 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
6689 noneg_PVSSRC(&(pAsm
->S
[1].src
));
6690 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_X
;
6691 pAsm
->S
[1].src
.swizzley
= SQ_SEL_Y
;
6692 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_Z
;
6693 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_W
;
6695 if( GL_FALSE
== next_ins_literal(pAsm
, &(fLiteral
[0])) )
6700 pAsm
->S
[1].src
.rtype
= DST_REG_TEMPORARY
;
6701 pAsm
->S
[1].src
.reg
= 0;
6702 setaddrmode_PVSSRC(&(pAsm
->S
[1].src
), ADDR_ABSOLUTE
);
6703 noneg_PVSSRC(&(pAsm
->S
[1].src
));
6704 pAsm
->S
[1].src
.swizzlex
= SQ_SEL_1
;
6705 pAsm
->S
[1].src
.swizzley
= SQ_SEL_1
;
6706 pAsm
->S
[1].src
.swizzlez
= SQ_SEL_1
;
6707 pAsm
->S
[1].src
.swizzlew
= SQ_SEL_1
;
6709 if( GL_FALSE
== next_ins(pAsm
) )
6715 checkStackDepth(pAsm
, FC_PUSH_VPM
, GL_TRUE
);
6720 GLboolean
returnOnFlag(r700_AssemblerBase
*pAsm
, GLuint unIF
)
6723 jumpToOffest(pAsm
, 1, 4);
6724 setRetInLoopFlag(pAsm
, SQ_SEL_0
);
6725 pops(pAsm
, unIF
+ 1);
6726 add_return_inst(pAsm
);
6731 GLboolean
breakLoopOnFlag(r700_AssemblerBase
*pAsm
, GLuint unFCSP
)
6736 if(GL_FALSE
== add_cf_instruction(pAsm
) )
6741 if(8 == pAsm
->unAsic
)
6743 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6745 EG_CF_WORD1__POP_COUNT_shift
, EG_CF_WORD1__POP_COUNT_mask
);
6746 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6747 EG_CF_INST_LOOP_BREAK
,
6748 EG_CF_WORD1__CF_INST_shift
, EG_CF_WORD1__CF_INST_mask
);
6749 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6751 EG_CF_WORD1__CF_CONST_shift
, EG_CF_WORD1__CF_CONST_mask
);
6752 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6754 EG_CF_WORD1__COND_shift
, EG_CF_WORD1__COND_mask
);
6755 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6757 EG_CF_WORD1__EOP_shift
, EG_CF_WORD1__EOP_bit
);
6758 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6760 EG_CF_WORD1__VPM_shift
, EG_CF_WORD1__VPM_bit
);
6761 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6763 EG_CF_WORD1__WQM_shift
, EG_CF_WORD1__WQM_bit
);
6764 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6766 EG_CF_WORD1__BARRIER_shift
, EG_CF_WORD1__BARRIER_bit
);
6767 SETfield(pAsm
->cf_current_cf_clause_ptr
->m_Word1
.val
,
6769 EG_CF_WORD1__COUNT_shift
, EG_CF_WORD1__COUNT_mask
);
6773 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.pop_count
= 1;
6774 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_const
= 0x0;
6775 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cond
= SQ_CF_COND_ACTIVE
;
6777 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
6778 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
6779 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_LOOP_BREAK
;
6780 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
6782 pAsm
->cf_current_cf_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
6785 pAsm
->fc_stack
[unFCSP
].mid
= (R700ControlFlowGenericClause
**)_mesa_realloc(
6786 (void *)pAsm
->fc_stack
[unFCSP
].mid
,
6787 sizeof(R700ControlFlowGenericClause
*) * pAsm
->fc_stack
[unFCSP
].unNumMid
,
6788 sizeof(R700ControlFlowGenericClause
*) * (pAsm
->fc_stack
[unFCSP
].unNumMid
+ 1) );
6789 pAsm
->fc_stack
[unFCSP
].mid
[pAsm
->fc_stack
[unFCSP
].unNumMid
] = pAsm
->cf_current_cf_clause_ptr
;
6790 pAsm
->fc_stack
[unFCSP
].unNumMid
++;
6797 GLboolean
AssembleInstr(GLuint uiFirstInst
,
6799 GLuint uiNumberInsts
,
6800 struct prog_instruction
*pILInst
,
6801 r700_AssemblerBase
*pR700AsmCode
)
6805 pR700AsmCode
->pILInst
= pILInst
;
6806 for(i
=uiFirstInst
; i
<uiNumberInsts
; i
++)
6808 pR700AsmCode
->uiCurInst
= i
;
6810 #ifndef USE_CF_FOR_CONTINUE_BREAK
6811 if(OPCODE_BRK
== pILInst
[i
+1].Opcode
)
6813 switch(pILInst
[i
].Opcode
)
6816 pILInst
[i
].Opcode
= OPCODE_SGT
;
6819 pILInst
[i
].Opcode
= OPCODE_SGE
;
6822 pILInst
[i
].Opcode
= OPCODE_SLT
;
6825 pILInst
[i
].Opcode
= OPCODE_SLE
;
6828 pILInst
[i
].Opcode
= OPCODE_SNE
;
6831 pILInst
[i
].Opcode
= OPCODE_SEQ
;
6838 if(pILInst
[i
].CondUpdate
== 1)
6840 /* remember dest register used for cond evaluation */
6841 /* XXX also handle PROGRAM_OUTPUT registers here? */
6842 pR700AsmCode
->last_cond_register
= pILInst
[i
].DstReg
.Index
;
6845 switch (pILInst
[i
].Opcode
)
6848 if ( GL_FALSE
== assemble_ABS(pR700AsmCode
) )
6853 if ( GL_FALSE
== assemble_ADD(pR700AsmCode
) )
6858 if ( GL_FALSE
== assemble_ARL(pR700AsmCode
) )
6862 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
6863 //if ( GL_FALSE == assemble_BAD("ARR") )
6868 if ( GL_FALSE
== assemble_CMP(pR700AsmCode
) )
6872 if(8 == pR700AsmCode
->unAsic
)
6874 if ( GL_FALSE
== assemble_TRIG(pR700AsmCode
, EG_OP2_INST_COS
) )
6879 if ( GL_FALSE
== assemble_TRIG(pR700AsmCode
, SQ_OP2_INST_COS
) )
6888 if ( GL_FALSE
== assemble_DOT(pR700AsmCode
) )
6893 if ( GL_FALSE
== assemble_DST(pR700AsmCode
) )
6898 if ( GL_FALSE
== assemble_EX2(pR700AsmCode
) )
6902 if ( GL_FALSE
== assemble_EXP(pR700AsmCode
) )
6907 if ( GL_FALSE
== assemble_FLR(pR700AsmCode
) )
6910 //case OP_FLR_INT: ;
6912 // if ( GL_FALSE == assemble_FLR_INT() )
6917 if ( GL_FALSE
== assemble_FRC(pR700AsmCode
) )
6923 if ( GL_FALSE
== assemble_KIL(pR700AsmCode
, SQ_OP2_INST_KILLGT
) )
6927 if ( GL_FALSE
== assemble_LG2(pR700AsmCode
) )
6931 if ( GL_FALSE
== assemble_LIT(pR700AsmCode
) )
6935 if ( GL_FALSE
== assemble_LRP(pR700AsmCode
) )
6939 if ( GL_FALSE
== assemble_LOG(pR700AsmCode
) )
6944 if ( GL_FALSE
== assemble_MAD(pR700AsmCode
) )
6948 if ( GL_FALSE
== assemble_MAX(pR700AsmCode
) )
6952 if ( GL_FALSE
== assemble_MIN(pR700AsmCode
) )
6957 if ( GL_FALSE
== assemble_MOV(pR700AsmCode
) )
6961 if ( GL_FALSE
== assemble_MUL(pR700AsmCode
) )
6967 callPreSub(pR700AsmCode
,
6970 pILInst
->DstReg
.Index
+ pR700AsmCode
->starting_temp_register_number
,
6972 radeon_error("noise1: not yet supported shader instruction\n");
6976 radeon_error("noise2: not yet supported shader instruction\n");
6979 radeon_error("noise3: not yet supported shader instruction\n");
6982 radeon_error("noise4: not yet supported shader instruction\n");
6986 if ( GL_FALSE
== assemble_POW(pR700AsmCode
) )
6990 if ( GL_FALSE
== assemble_RCP(pR700AsmCode
) )
6994 if ( GL_FALSE
== assemble_RSQ(pR700AsmCode
) )
6998 if(8 == pR700AsmCode
->unAsic
)
7000 if ( GL_FALSE
== assemble_TRIG(pR700AsmCode
, EG_OP2_INST_SIN
) )
7005 if ( GL_FALSE
== assemble_TRIG(pR700AsmCode
, SQ_OP2_INST_SIN
) )
7010 if ( GL_FALSE
== assemble_SCS(pR700AsmCode
) )
7015 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETE
) )
7022 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETGT
) )
7029 if ( GL_FALSE
== assemble_SGE(pR700AsmCode
) )
7035 /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/
7038 struct prog_src_register SrcRegSave
[2];
7039 SrcRegSave
[0] = pILInst
[i
].SrcReg
[0];
7040 SrcRegSave
[1] = pILInst
[i
].SrcReg
[1];
7041 pILInst
[i
].SrcReg
[0] = SrcRegSave
[1];
7042 pILInst
[i
].SrcReg
[1] = SrcRegSave
[0];
7043 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETGT
) )
7045 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
7046 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
7049 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
7050 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
7056 struct prog_src_register SrcRegSave
[2];
7057 SrcRegSave
[0] = pILInst
[i
].SrcReg
[0];
7058 SrcRegSave
[1] = pILInst
[i
].SrcReg
[1];
7059 pILInst
[i
].SrcReg
[0] = SrcRegSave
[1];
7060 pILInst
[i
].SrcReg
[1] = SrcRegSave
[0];
7061 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETGE
) )
7063 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
7064 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
7067 pILInst
[i
].SrcReg
[0] = SrcRegSave
[0];
7068 pILInst
[i
].SrcReg
[1] = SrcRegSave
[1];
7073 if ( GL_FALSE
== assemble_LOGIC(pR700AsmCode
, SQ_OP2_INST_SETNE
) )
7080 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
7085 if ( GL_FALSE
== assemble_SSG(pR700AsmCode
) )
7092 if ( GL_FALSE
== assemble_MOV(pR700AsmCode
) )
7098 if( (i
+1)<uiNumberInsts
)
7100 if(OPCODE_END
!= pILInst
[i
+1].Opcode
)
7102 if( GL_TRUE
== IsTex(pILInst
[i
+1].Opcode
) )
7104 pR700AsmCode
->pInstDeps
[i
+1].nDstDep
= i
+1; //=1?
7116 if ( GL_FALSE
== assemble_TEX(pR700AsmCode
) )
7121 if ( GL_FALSE
== assemble_math_function(pR700AsmCode
, SQ_OP2_INST_TRUNC
) )
7126 if ( GL_FALSE
== assemble_XPD(pR700AsmCode
) )
7132 GLboolean bHasElse
= GL_FALSE
;
7134 if(pILInst
[pILInst
[i
].BranchTarget
].Opcode
== OPCODE_ELSE
)
7139 if ( GL_FALSE
== assemble_IF(pR700AsmCode
, bHasElse
) )
7147 if ( GL_FALSE
== assemble_ELSE(pR700AsmCode
) )
7152 if ( GL_FALSE
== assemble_ENDIF(pR700AsmCode
) )
7156 case OPCODE_BGNLOOP
:
7157 if( GL_FALSE
== assemble_BGNLOOP(pR700AsmCode
) )
7164 if( GL_FALSE
== assemble_BRK(pR700AsmCode
) )
7171 if( GL_FALSE
== assemble_CONT(pR700AsmCode
) )
7177 case OPCODE_ENDLOOP
:
7178 if( GL_FALSE
== assemble_ENDLOOP(pR700AsmCode
) )
7185 if( GL_FALSE
== assemble_BGNSUB(pR700AsmCode
, i
, uiIL_Shift
) )
7192 if( GL_FALSE
== assemble_RET(pR700AsmCode
) )
7199 if( GL_FALSE
== assemble_CAL(pR700AsmCode
,
7200 pILInst
[i
].BranchTarget
,
7210 //case OPCODE_EXPORT:
7211 // if ( GL_FALSE == assemble_EXPORT() )
7216 return assemble_ENDSUB(pR700AsmCode
);
7219 //pR700AsmCode->uiCurInst = i;
7220 //This is to remaind that if in later exoort there is depth/stencil
7221 //export, we need a mov to re-arrange DST channel, where using a
7222 //psuedo inst, we will use this end inst to do it.
7226 radeon_error("r600: unknown instruction %d\n", pILInst
[i
].Opcode
);
7234 GLboolean
InitShaderProgram(r700_AssemblerBase
* pAsm
)
7236 #ifndef GENERATE_SHADER_FOR_2D
7237 setRetInLoopFlag(pAsm
, SQ_SEL_0
);
7240 if((SPT_FP
== pAsm
->currentShaderType
) && (8 == pAsm
->unAsic
))
7242 EG_add_ps_interp(pAsm
);
7245 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
7249 GLboolean
RelocProgram(r700_AssemblerBase
* pAsm
, struct gl_program
* pILProg
)
7253 TypedShaderList
* plstCFmain
;
7254 TypedShaderList
* plstCFsub
;
7256 R700ShaderInstruction
* pInst
;
7257 R700ControlFlowGenericClause
* pCFInst
;
7259 R700ControlFlowALUClause
* pCF_ALU
;
7260 R700ALUInstruction
* pALU
;
7261 GLuint unConstOffset
= 0;
7263 GLuint unMinRegIndex
;
7265 plstCFmain
= pAsm
->CALLSTACK
[0].plstCFInstructions_local
;
7267 #ifndef GENERATE_SHADER_FOR_2D
7268 /* remove flags init if they are not used */
7269 if((pAsm
->unCFflags
& HAS_LOOPRET
) == 0)
7271 R700ControlFlowALUClause
* pCF_ALU
;
7272 pInst
= plstCFmain
->pHead
;
7275 if(SIT_CF_ALU
== pInst
->m_ShaderInstType
)
7277 pCF_ALU
= (R700ControlFlowALUClause
*)pInst
;
7278 if(0 == pCF_ALU
->m_Word1
.f
.count
)
7280 pCF_ALU
->m_Word1
.f
.cf_inst
= SQ_CF_INST_NOP
;
7284 R700ALUInstruction
* pALU
= pCF_ALU
->m_pLinkedALUInstruction
;
7286 pALU
->m_pLinkedALUClause
= NULL
;
7287 pALU
= (R700ALUInstruction
*)(pALU
->pNextInst
);
7288 pALU
->m_pLinkedALUClause
= pCF_ALU
;
7289 pCF_ALU
->m_pLinkedALUInstruction
= pALU
;
7291 pCF_ALU
->m_Word1
.f
.count
--;
7295 pInst
= pInst
->pNextInst
;
7298 #endif /* GENERATE_SHADER_FOR_2D */
7300 if(pAsm
->CALLSTACK
[0].max
> 0)
7302 pAsm
->pR700Shader
->uStackSize
= ((pAsm
->CALLSTACK
[0].max
+ 3)>>2) + 2;
7305 if(0 == pAsm
->unSubArrayPointer
)
7310 unCFoffset
= plstCFmain
->uNumOfNode
;
7312 if(NULL
!= pILProg
->Parameters
)
7314 unConstOffset
= pILProg
->Parameters
->NumParameters
;
7318 for(i
=0; i
<pAsm
->unSubArrayPointer
; i
++)
7320 pAsm
->subs
[i
].unCFoffset
= unCFoffset
;
7321 plstCFsub
= &(pAsm
->subs
[i
].lstCFInstructions_local
);
7323 pInst
= plstCFsub
->pHead
;
7325 /* reloc instructions */
7328 if(SIT_CF_GENERIC
== pInst
->m_ShaderInstType
)
7330 pCFInst
= (R700ControlFlowGenericClause
*)pInst
;
7332 switch (pCFInst
->m_Word1
.f
.cf_inst
)
7334 case SQ_CF_INST_POP
:
7335 case SQ_CF_INST_JUMP
:
7336 case SQ_CF_INST_ELSE
:
7337 case SQ_CF_INST_LOOP_END
:
7338 case SQ_CF_INST_LOOP_START
:
7339 case SQ_CF_INST_LOOP_START_NO_AL
:
7340 case SQ_CF_INST_LOOP_CONTINUE
:
7341 case SQ_CF_INST_LOOP_BREAK
:
7342 pCFInst
->m_Word0
.f
.addr
+= unCFoffset
;
7349 pInst
->m_uIndex
+= unCFoffset
;
7351 pInst
= pInst
->pNextInst
;
7354 if(NULL
!= pAsm
->subs
[i
].pPresubDesc
)
7358 unMinRegIndex
= pAsm
->subs
[i
].pPresubDesc
->pCompiledSub
->MinRegIndex
;
7359 unRegOffset
= pAsm
->subs
[i
].pPresubDesc
->maxStartReg
;
7360 unConstOffset
+= pAsm
->subs
[i
].pPresubDesc
->unConstantsStart
;
7362 pInst
= plstCFsub
->pHead
;
7365 if(SIT_CF_ALU
== pInst
->m_ShaderInstType
)
7367 pCF_ALU
= (R700ControlFlowALUClause
*)pInst
;
7369 pALU
= pCF_ALU
->m_pLinkedALUInstruction
;
7370 for(int j
=0; j
<=pCF_ALU
->m_Word1
.f
.count
; j
++)
7372 pALU
->m_Word1
.f
.dst_gpr
= pALU
->m_Word1
.f
.dst_gpr
+ unRegOffset
- unMinRegIndex
;
7374 if(pALU
->m_Word0
.f
.src0_sel
< SQ_ALU_SRC_GPR_SIZE
)
7376 pALU
->m_Word0
.f
.src0_sel
= pALU
->m_Word0
.f
.src0_sel
+ unRegOffset
- unMinRegIndex
;
7378 else if(pALU
->m_Word0
.f
.src0_sel
>= SQ_ALU_SRC_CFILE_BASE
)
7380 pALU
->m_Word0
.f
.src0_sel
+= unConstOffset
;
7383 if( ((pALU
->m_Word1
.val
>> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT
) & 0x0000001F)
7384 >= SQ_OP3_INST_MUL_LIT
)
7385 { /* op3 : 3 srcs */
7386 if(pALU
->m_Word1_OP3
.f
.src2_sel
< SQ_ALU_SRC_GPR_SIZE
)
7388 pALU
->m_Word1_OP3
.f
.src2_sel
= pALU
->m_Word1_OP3
.f
.src2_sel
+ unRegOffset
- unMinRegIndex
;
7390 else if(pALU
->m_Word1_OP3
.f
.src2_sel
>= SQ_ALU_SRC_CFILE_BASE
)
7392 pALU
->m_Word1_OP3
.f
.src2_sel
+= unConstOffset
;
7394 if(pALU
->m_Word0
.f
.src1_sel
< SQ_ALU_SRC_GPR_SIZE
)
7396 pALU
->m_Word0
.f
.src1_sel
= pALU
->m_Word0
.f
.src1_sel
+ unRegOffset
- unMinRegIndex
;
7398 else if(pALU
->m_Word0
.f
.src1_sel
>= SQ_ALU_SRC_CFILE_BASE
)
7400 pALU
->m_Word0
.f
.src1_sel
+= unConstOffset
;
7405 if(8 == pAsm
->unAsic
)
7407 uNumSrc
= EG_GetNumOperands(pALU
->m_Word1_OP2
.f
.alu_inst
, 0);
7413 uNumSrc
= r700GetNumOperands(pALU
->m_Word1_OP2
.f6
.alu_inst
, 0);
7417 uNumSrc
= r700GetNumOperands(pALU
->m_Word1_OP2
.f
.alu_inst
, 0);
7422 if(pALU
->m_Word0
.f
.src1_sel
< SQ_ALU_SRC_GPR_SIZE
)
7424 pALU
->m_Word0
.f
.src1_sel
= pALU
->m_Word0
.f
.src1_sel
+ unRegOffset
- unMinRegIndex
;
7426 else if(pALU
->m_Word0
.f
.src1_sel
>= SQ_ALU_SRC_CFILE_BASE
)
7428 pALU
->m_Word0
.f
.src1_sel
+= unConstOffset
;
7432 pALU
= (R700ALUInstruction
*)(pALU
->pNextInst
);
7435 pInst
= pInst
->pNextInst
;
7439 /* Put sub into main */
7440 plstCFmain
->pTail
->pNextInst
= plstCFsub
->pHead
;
7441 plstCFmain
->pTail
= plstCFsub
->pTail
;
7442 plstCFmain
->uNumOfNode
+= plstCFsub
->uNumOfNode
;
7444 unCFoffset
+= plstCFsub
->uNumOfNode
;
7448 for(i
=0; i
<pAsm
->unCallerArrayPointer
; i
++)
7450 pAsm
->callers
[i
].cf_ptr
->m_Word0
.f
.addr
7451 = pAsm
->subs
[pAsm
->callers
[i
].subDescIndex
].unCFoffset
;
7453 if(NULL
!= pAsm
->subs
[pAsm
->callers
[i
].subDescIndex
].pPresubDesc
)
7455 unMinRegIndex
= pAsm
->subs
[pAsm
->callers
[i
].subDescIndex
].pPresubDesc
->pCompiledSub
->MinRegIndex
;
7456 unRegOffset
= pAsm
->subs
[pAsm
->callers
[i
].subDescIndex
].pPresubDesc
->maxStartReg
;
7458 if(NULL
!= pAsm
->callers
[i
].prelude_cf_ptr
)
7460 pCF_ALU
= (R700ControlFlowALUClause
* )(pAsm
->callers
[i
].prelude_cf_ptr
);
7461 pALU
= pCF_ALU
->m_pLinkedALUInstruction
;
7462 for(int j
=0; j
<=pCF_ALU
->m_Word1
.f
.count
; j
++)
7464 pALU
->m_Word1
.f
.dst_gpr
= pALU
->m_Word1
.f
.dst_gpr
+ unRegOffset
- unMinRegIndex
;
7465 pALU
= (R700ALUInstruction
*)(pALU
->pNextInst
);
7468 if(NULL
!= pAsm
->callers
[i
].finale_cf_ptr
)
7470 pCF_ALU
= (R700ControlFlowALUClause
* )(pAsm
->callers
[i
].finale_cf_ptr
);
7471 pALU
= pCF_ALU
->m_pLinkedALUInstruction
;
7472 for(int j
=0; j
<=pCF_ALU
->m_Word1
.f
.count
; j
++)
7474 pALU
->m_Word0
.f
.src0_sel
= pALU
->m_Word0
.f
.src0_sel
+ unRegOffset
- unMinRegIndex
;
7475 pALU
= (R700ALUInstruction
*)(pALU
->pNextInst
);
7484 GLboolean
callPreSub(r700_AssemblerBase
* pAsm
,
7485 LOADABLE_SCRIPT_SIGNITURE scriptSigniture
,
7486 COMPILED_SUB
* pCompiledSub
,
7488 GLshort uNumValidSrc
)
7490 /* save assemble context */
7491 GLuint starting_temp_register_number_save
;
7492 GLuint number_used_registers_save
;
7493 GLuint uFirstHelpReg_save
;
7494 GLuint uHelpReg_save
;
7495 GLuint uiCurInst_save
;
7496 struct prog_instruction
*pILInst_save
;
7497 PRESUB_DESC
* pPresubDesc
;
7501 R700ControlFlowGenericClause
* prelude_cf_ptr
= NULL
;
7503 /* copy srcs to presub inputs */
7504 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
7505 for(i
=0; i
<uNumValidSrc
; i
++)
7507 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
7508 setaddrmode_PVSDST(&(pAsm
->D
.dst
), ADDR_ABSOLUTE
);
7509 pAsm
->D
.dst
.rtype
= DST_REG_TEMPORARY
;
7510 pAsm
->D
.dst
.reg
= pCompiledSub
->srcRegIndex
[i
];
7511 pAsm
->D
.dst
.writex
= 1;
7512 pAsm
->D
.dst
.writey
= 1;
7513 pAsm
->D
.dst
.writez
= 1;
7514 pAsm
->D
.dst
.writew
= 1;
7516 if( GL_FALSE
== assemble_src(pAsm
, i
, 0) )
7523 if(uNumValidSrc
> 0)
7525 prelude_cf_ptr
= (R700ControlFlowGenericClause
*) pAsm
->cf_current_alu_clause_ptr
;
7526 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
7529 /* browse thro existing presubs. */
7530 for(i
=0; i
<pAsm
->unNumPresub
; i
++)
7532 if(pAsm
->presubs
[i
].sptSigniture
== scriptSigniture
)
7538 if(i
== pAsm
->unNumPresub
)
7539 { /* not loaded yet */
7540 /* save assemble context */
7541 number_used_registers_save
= pAsm
->number_used_registers
;
7542 uFirstHelpReg_save
= pAsm
->uFirstHelpReg
;
7543 uHelpReg_save
= pAsm
->uHelpReg
;
7544 starting_temp_register_number_save
= pAsm
->starting_temp_register_number
;
7545 pILInst_save
= pAsm
->pILInst
;
7546 uiCurInst_save
= pAsm
->uiCurInst
;
7548 /* alloc in presub */
7549 if( (pAsm
->unNumPresub
+ 1) > pAsm
->unPresubArraySize
)
7551 pAsm
->presubs
= (PRESUB_DESC
*)_mesa_realloc( (void *)pAsm
->presubs
,
7552 sizeof(PRESUB_DESC
) * pAsm
->unPresubArraySize
,
7553 sizeof(PRESUB_DESC
) * (pAsm
->unPresubArraySize
+ 4) );
7554 if(NULL
== pAsm
->presubs
)
7556 radeon_error("No memeory to allocate built in shader function description structures. \n");
7559 pAsm
->unPresubArraySize
+= 4;
7562 pPresubDesc
= &(pAsm
->presubs
[i
]);
7563 pPresubDesc
->sptSigniture
= scriptSigniture
;
7565 /* constants offsets need to be final resolved at reloc. */
7566 if(0 == pAsm
->unNumPresub
)
7568 pPresubDesc
->unConstantsStart
= 0;
7572 pPresubDesc
->unConstantsStart
= pAsm
->presubs
[i
-1].unConstantsStart
7573 + pAsm
->presubs
[i
-1].pCompiledSub
->NumParameters
;
7576 pPresubDesc
->pCompiledSub
= pCompiledSub
;
7578 pPresubDesc
->subIL_Shift
= pAsm
->unCurNumILInsts
;
7579 pPresubDesc
->maxStartReg
= uFirstHelpReg_save
;
7580 pAsm
->unCurNumILInsts
+= pCompiledSub
->NumInstructions
;
7582 pAsm
->unNumPresub
++;
7584 /* setup new assemble context */
7585 pAsm
->starting_temp_register_number
= 0;
7586 pAsm
->number_used_registers
= pCompiledSub
->NumTemporaries
;
7587 pAsm
->uFirstHelpReg
= pAsm
->number_used_registers
;
7588 pAsm
->uHelpReg
= pAsm
->uFirstHelpReg
;
7590 bRet
= assemble_CAL(pAsm
,
7592 pPresubDesc
->subIL_Shift
,
7593 pCompiledSub
->NumInstructions
,
7594 pCompiledSub
->Instructions
,
7598 pPresubDesc
->number_used_registers
= pAsm
->number_used_registers
;
7600 /* restore assemble context */
7601 pAsm
->number_used_registers
= number_used_registers_save
;
7602 pAsm
->uFirstHelpReg
= uFirstHelpReg_save
;
7603 pAsm
->uHelpReg
= uHelpReg_save
;
7604 pAsm
->starting_temp_register_number
= starting_temp_register_number_save
;
7605 pAsm
->pILInst
= pILInst_save
;
7606 pAsm
->uiCurInst
= uiCurInst_save
;
7610 pPresubDesc
= &(pAsm
->presubs
[i
]);
7612 bRet
= assemble_CAL(pAsm
,
7614 pPresubDesc
->subIL_Shift
,
7615 pCompiledSub
->NumInstructions
,
7616 pCompiledSub
->Instructions
,
7620 if(GL_FALSE
== bRet
)
7622 radeon_error("Shader presub assemble failed. \n");
7626 /* copy presub output to real dst */
7627 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
7628 pAsm
->D
.dst
.opcode
= SQ_OP2_INST_MOV
;
7630 if( GL_FALSE
== assemble_dst(pAsm
) )
7635 setaddrmode_PVSSRC(&(pAsm
->S
[0].src
), ADDR_ABSOLUTE
);
7636 pAsm
->S
[0].src
.rtype
= SRC_REG_TEMPORARY
;
7637 pAsm
->S
[0].src
.reg
= pCompiledSub
->dstRegIndex
;
7638 pAsm
->S
[0].src
.swizzlex
= pCompiledSub
->outputSwizzleX
;
7639 pAsm
->S
[0].src
.swizzley
= pCompiledSub
->outputSwizzleY
;
7640 pAsm
->S
[0].src
.swizzlez
= pCompiledSub
->outputSwizzleZ
;
7641 pAsm
->S
[0].src
.swizzlew
= pCompiledSub
->outputSwizzleW
;
7645 pAsm
->callers
[pAsm
->unCallerArrayPointer
- 1].finale_cf_ptr
= (R700ControlFlowGenericClause
*) pAsm
->cf_current_alu_clause_ptr
;
7646 pAsm
->callers
[pAsm
->unCallerArrayPointer
- 1].prelude_cf_ptr
= prelude_cf_ptr
;
7647 pAsm
->alu_x_opcode
= SQ_CF_INST_ALU
;
7650 if( (pPresubDesc
->number_used_registers
+ pAsm
->uFirstHelpReg
) > pAsm
->number_used_registers
)
7652 pAsm
->number_used_registers
= pPresubDesc
->number_used_registers
+ pAsm
->uFirstHelpReg
;
7654 if(pAsm
->uFirstHelpReg
> pPresubDesc
->maxStartReg
)
7656 pPresubDesc
->maxStartReg
= pAsm
->uFirstHelpReg
;
7662 GLboolean
Process_Export(r700_AssemblerBase
* pAsm
,
7664 GLuint export_starting_index
,
7665 GLuint export_count
,
7666 GLuint starting_register_number
,
7667 GLboolean is_depth_export
)
7669 check_current_clause(pAsm
, CF_EMPTY_CLAUSE
);
7670 check_current_clause(pAsm
, CF_EXPORT_CLAUSE
); //alloc the cf_current_export_clause_ptr
7672 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.type
= type
;
7676 case SQ_EXPORT_PIXEL
:
7677 if(GL_TRUE
== is_depth_export
)
7679 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_PIXEL_Z
;
7683 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_PIXEL_MRT0
+ export_starting_index
;
7688 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= SQ_CF_POS_0
+ export_starting_index
;
7691 case SQ_EXPORT_PARAM
:
7692 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.array_base
= 0x0 + export_starting_index
;
7696 radeon_error("Unknown export type: %d\n", type
);
7701 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.rw_gpr
= starting_register_number
;
7703 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.rw_rel
= SQ_ABSOLUTE
;
7704 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.index_gpr
= 0x0;
7705 pAsm
->cf_current_export_clause_ptr
->m_Word0
.f
.elem_size
= 0x3;
7707 if(8 == pAsm
->unAsic
)
7709 SETfield(pAsm
->cf_current_export_clause_ptr
->m_Word1
.val
,
7711 EG_CF_ALLOC_EXPORT_WORD1__BURST_COUNT_shift
,
7712 EG_CF_ALLOC_EXPORT_WORD1__BURST_COUNT_mask
);
7713 SETfield(pAsm
->cf_current_export_clause_ptr
->m_Word1
.val
,
7715 EG_CF_ALLOC_EXPORT_WORD1__EOP_shift
,
7716 EG_CF_ALLOC_EXPORT_WORD1__EOP_bit
);
7717 SETfield(pAsm
->cf_current_export_clause_ptr
->m_Word1
.val
,
7719 EG_CF_ALLOC_EXPORT_WORD1__VPM_shift
,
7720 EG_CF_ALLOC_EXPORT_WORD1__VPM_bit
);
7721 SETfield(pAsm
->cf_current_export_clause_ptr
->m_Word1
.val
,
7723 EG_CF_WORD1__CF_INST_shift
,
7724 EG_CF_WORD1__CF_INST_mask
);
7725 SETfield(pAsm
->cf_current_export_clause_ptr
->m_Word1
.val
,
7727 EG_CF_ALLOC_EXPORT_WORD1__MARK_shift
,
7728 EG_CF_ALLOC_EXPORT_WORD1__MARK_bit
);
7729 SETfield(pAsm
->cf_current_export_clause_ptr
->m_Word1
.val
,
7731 EG_CF_ALLOC_EXPORT_WORD1__BARRIER_shift
,
7732 EG_CF_ALLOC_EXPORT_WORD1__BARRIER_bit
);
7736 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.burst_count
= (export_count
- 1);
7737 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.end_of_program
= 0x0;
7738 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.valid_pixel_mode
= 0x0;
7739 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT
; // _DONE
7740 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.whole_quad_mode
= 0x0;
7741 pAsm
->cf_current_export_clause_ptr
->m_Word1
.f
.barrier
= 0x1;
7744 if (export_count
== 1)
7746 assert(starting_register_number
>= pAsm
->starting_export_register_number
);
7748 /* exports Z as a float into Red channel */
7749 if (GL_TRUE
== is_depth_export
)
7751 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_Z
;
7752 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_MASK
;
7753 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_MASK
;
7754 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_MASK
;
7758 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_X
;
7759 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_Y
;
7760 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_Z
;
7761 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_W
;
7766 // This should only be used if all components for all registers have been written
7767 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_X
;
7768 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_Y
;
7769 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_Z
;
7770 pAsm
->cf_current_export_clause_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_W
;
7773 pAsm
->cf_last_export_ptr
= pAsm
->cf_current_export_clause_ptr
;
7778 GLboolean
Process_Fragment_Exports(r700_AssemblerBase
*pR700AsmCode
,
7779 GLbitfield OutputsWritten
)
7782 GLuint export_count
= 0;
7785 for (i
= 0; i
< FRAG_RESULT_MAX
; ++i
)
7789 if (OutputsWritten
& unBit
)
7791 GLboolean is_depth
= i
== FRAG_RESULT_DEPTH
? GL_TRUE
: GL_FALSE
;
7792 if (!Process_Export(pR700AsmCode
, SQ_EXPORT_PIXEL
, 0, 1, pR700AsmCode
->uiFP_OutputMap
[i
], is_depth
))
7798 /* Need to export something, otherwise we'll hang
7799 * results are undefined anyway */
7800 if(export_count
== 0)
7802 Process_Export(pR700AsmCode
, SQ_EXPORT_PIXEL
, 0, 1, pR700AsmCode
->starting_export_register_number
, GL_FALSE
);
7805 if(pR700AsmCode
->cf_last_export_ptr
!= NULL
)
7807 if(8 == pR700AsmCode
->unAsic
)
7809 SETfield(pR700AsmCode
->cf_last_export_ptr
->m_Word1
.val
,
7811 EG_CF_ALLOC_EXPORT_WORD1__EOP_shift
,
7812 EG_CF_ALLOC_EXPORT_WORD1__EOP_bit
);
7813 SETfield(pR700AsmCode
->cf_last_export_ptr
->m_Word1
.val
,
7814 EG_CF_INST_EXPORT_DONE
,
7815 EG_CF_WORD1__CF_INST_shift
,
7816 EG_CF_WORD1__CF_INST_mask
);
7820 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
7821 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.end_of_program
= 0x1;
7828 GLboolean
Process_Vertex_Exports(r700_AssemblerBase
*pR700AsmCode
,
7829 GLbitfield OutputsWritten
)
7834 GLuint export_starting_index
= 0;
7835 GLuint export_count
= pR700AsmCode
->number_of_exports
;
7837 unBit
= 1 << VERT_RESULT_HPOS
;
7838 if(OutputsWritten
& unBit
)
7840 if( GL_FALSE
== Process_Export(pR700AsmCode
,
7842 export_starting_index
,
7844 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_HPOS
],
7849 export_starting_index
++;
7853 unBit
= 1 << VERT_RESULT_PSIZ
;
7854 if(OutputsWritten
& unBit
)
7856 if( GL_FALSE
== Process_Export(pR700AsmCode
,
7858 export_starting_index
,
7860 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_PSIZ
],
7868 if(8 == pR700AsmCode
->unAsic
)
7870 SETfield(pR700AsmCode
->cf_last_export_ptr
->m_Word1
.val
,
7871 EG_CF_INST_EXPORT_DONE
,
7872 EG_CF_WORD1__CF_INST_shift
,
7873 EG_CF_WORD1__CF_INST_mask
);
7877 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
7881 pR700AsmCode
->number_of_exports
= export_count
;
7882 export_starting_index
= 0;
7884 unBit
= 1 << VERT_RESULT_COL0
;
7885 if(OutputsWritten
& unBit
)
7887 if( GL_FALSE
== Process_Export(pR700AsmCode
,
7889 export_starting_index
,
7891 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_COL0
],
7897 export_starting_index
++;
7900 unBit
= 1 << VERT_RESULT_COL1
;
7901 if(OutputsWritten
& unBit
)
7903 if( GL_FALSE
== Process_Export(pR700AsmCode
,
7905 export_starting_index
,
7907 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_COL1
],
7913 export_starting_index
++;
7916 unBit
= 1 << VERT_RESULT_FOGC
;
7917 if(OutputsWritten
& unBit
)
7919 if( GL_FALSE
== Process_Export(pR700AsmCode
,
7921 export_starting_index
,
7923 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_FOGC
],
7929 export_starting_index
++;
7934 unBit
= 1 << (VERT_RESULT_TEX0
+ i
);
7935 if(OutputsWritten
& unBit
)
7937 if( GL_FALSE
== Process_Export(pR700AsmCode
,
7939 export_starting_index
,
7941 pR700AsmCode
->ucVP_OutputMap
[VERT_RESULT_TEX0
+ i
],
7947 export_starting_index
++;
7951 for(i
=VERT_RESULT_VAR0
; i
<VERT_RESULT_MAX
; i
++)
7954 if(OutputsWritten
& unBit
)
7956 if( GL_FALSE
== Process_Export(pR700AsmCode
,
7958 export_starting_index
,
7960 pR700AsmCode
->ucVP_OutputMap
[i
],
7966 export_starting_index
++;
7970 // At least one param should be exported
7973 if(8 == pR700AsmCode
->unAsic
)
7975 SETfield(pR700AsmCode
->cf_last_export_ptr
->m_Word1
.val
,
7976 EG_CF_INST_EXPORT_DONE
,
7977 EG_CF_WORD1__CF_INST_shift
,
7978 EG_CF_WORD1__CF_INST_mask
);
7982 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
7987 if( GL_FALSE
== Process_Export(pR700AsmCode
,
7991 pR700AsmCode
->starting_export_register_number
,
7997 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_x
= SQ_SEL_0
;
7998 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_y
= SQ_SEL_0
;
7999 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_z
= SQ_SEL_0
;
8000 pR700AsmCode
->cf_last_export_ptr
->m_Word1_SWIZ
.f
.sel_w
= SQ_SEL_1
;
8001 if(8 == pR700AsmCode
->unAsic
)
8003 SETfield(pR700AsmCode
->cf_last_export_ptr
->m_Word1
.val
,
8004 EG_CF_INST_EXPORT_DONE
,
8005 EG_CF_WORD1__CF_INST_shift
,
8006 EG_CF_WORD1__CF_INST_mask
);
8010 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.cf_inst
= SQ_CF_INST_EXPORT_DONE
;
8014 pR700AsmCode
->cf_last_export_ptr
->m_Word1
.f
.end_of_program
= 0x1;
8019 GLboolean
Clean_Up_Assembler(r700_AssemblerBase
*pR700AsmCode
)
8021 if(NULL
!= pR700AsmCode
->pInstDeps
)
8023 FREE(pR700AsmCode
->pInstDeps
);
8024 pR700AsmCode
->pInstDeps
= NULL
;
8027 if(NULL
!= pR700AsmCode
->subs
)
8029 FREE(pR700AsmCode
->subs
);
8030 pR700AsmCode
->subs
= NULL
;
8032 if(NULL
!= pR700AsmCode
->callers
)
8034 FREE(pR700AsmCode
->callers
);
8035 pR700AsmCode
->callers
= NULL
;
8038 if(NULL
!= pR700AsmCode
->presubs
)
8040 FREE(pR700AsmCode
->presubs
);
8041 pR700AsmCode
->presubs
= NULL
;