2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
27 #ifndef _R700_ASSEMBLER_H_
28 #define _R700_ASSEMBLER_H_
30 #include "main/mtypes.h"
31 #include "shader/prog_instruction.h"
33 #include "r700_chip.h"
34 #include "r700_shaderinst.h"
35 #include "r700_shader.h"
37 typedef enum LOADABLE_SCRIPT_SIGNITURE
39 GLSL_NOISE1
= 0x10000001,
40 GLSL_NOISE2
= 0x10000002,
41 GLSL_NOISE3
= 0x10000003,
42 GLSL_NOISE4
= 0x10000004
43 }LOADABLE_SCRIPT_SIGNITURE
;
45 typedef struct COMPILED_SUB
47 struct prog_instruction
*Instructions
;
48 GLuint NumInstructions
;
49 GLuint NumTemporaries
;
52 GLfloat (*ParameterValues
)[4];
53 GLbyte outputSwizzleX
;
54 GLbyte outputSwizzleY
;
55 GLbyte outputSwizzleZ
;
56 GLbyte outputSwizzleW
;
57 GLshort srcRegIndex
[3];
61 typedef struct PRESUB_DESCtag
63 LOADABLE_SCRIPT_SIGNITURE sptSigniture
;
65 struct prog_src_register InReg
[3];
66 struct prog_dst_register OutReg
;
69 GLushort number_used_registers
;
71 GLuint unConstantsStart
;
73 COMPILED_SUB
* pCompiledSub
;
76 typedef enum SHADER_PIPE_TYPE
82 typedef enum ConstantCycles
85 NUMBER_OF_COMPONENTS
= 4
88 typedef enum HARDWARE_LIMIT_VALUES
90 TEMPORARY_REGISTER_OFFSET
= SQ_ALU_SRC_GPR_BASE
,
91 MAX_TEMPORARY_REGISTERS
= SQ_ALU_SRC_GPR_SIZE
,
92 MAX_CONSTANT_REGISTERS
= SQ_ALU_SRC_CFILE_SIZE
,
93 CFILE_REGISTER_OFFSET
= SQ_ALU_SRC_CFILE_BASE
,
94 NUMBER_OF_INPUT_COLORS
= 2,
95 NUMBER_OF_OUTPUT_COLORS
= 8,
96 NUMBER_OF_TEXTURE_UNITS
= 16,
98 } HARDWARE_LIMIT_VALUES
;
100 typedef enum AddressMode
103 ADDR_RELATIVE_A0
= 1,
104 ADDR_RELATIVE_FLI_0
= 2,
105 NUMBER_OF_ADDR_MOD
= 3
108 typedef enum SrcRegisterType
110 SRC_REG_TEMPORARY
= 0,
112 SRC_REG_CONSTANT
= 2,
113 SRC_REG_ALT_TEMPORARY
= 3,
115 NUMBER_OF_SRC_REG_TYPE
= 5
118 typedef enum DstRegisterType
120 DST_REG_TEMPORARY
= 0,
123 DST_REG_OUT_X_REPL
= 3,
124 DST_REG_ALT_TEMPORARY
= 4,
126 NUMBER_OF_DST_REG_TYPE
= 6
129 typedef unsigned int BITS
;
131 typedef struct PVSDSTtag
133 BITS opcode
:8; //(:6) //@@@ really should be 10 bits for OP2
135 BITS predicated
:1; //10 //8
136 BITS pred_inv
:1; //11 //8
139 BITS reg
:10; //24 //20
146 BITS op3
:1; // 29 Represents *_OP3_* ALU opcode
148 BITS dualop
:1; // 30 //26
150 BITS addrmode0
:1; //31 //29
151 BITS addrmode1
:1; //32
154 typedef struct PVSINSTtag
156 BITS literal_slots
:2;
157 BITS SaturateMode
:2;
161 typedef struct PVSSRCtag
165 BITS reg
:10; //14 (8)
169 BITS swizzlew
:3; //26
177 BITS addrmode1
:1; //32
180 typedef struct PVSMATHtag
187 BITS dstoff
:2; // 2 bits of dest offset into alt ram
191 BITS dstcomp
:2; // select dest component
195 typedef union PVSDWORDtag
205 typedef struct VAP_OUT_VTX_FMT_0tag
211 BITS pos_param
:1; // 4
226 BITS point_size
:1; // 15
228 BITS rta_index
:1; // shares same channel as kill_flag
230 BITS viewport_index
:1; // 19
232 BITS resvd1
:12; // 20
235 typedef struct VAP_OUT_VTX_FMT_1tag
249 typedef struct VAP_OUT_VTX_FMT_2tag
263 typedef struct OUT_FRAGMENT_FMT_0tag
276 BITS coverage_to_mask
:1;
280 } OUT_FRAGMENT_FMT_0
;
282 typedef enum CF_CLAUSE_TYPE
290 NUMBER_CF_CLAUSE_TYPES
295 MAX_BOOL_CONSTANTS
= 32,
296 MAX_INT_CONSTANTS
= 32,
297 MAX_FLOAT_CONSTANTS
= 256,
311 SAFEDIST_TEX
= 6, ///< safe distance for using result of texture lookup in alu or another tex lookup
312 SAFEDIST_ALU
= 6 ///< the same for alu->fc
315 typedef struct FC_LEVEL
317 R700ControlFlowGenericClause
* first
;
318 R700ControlFlowGenericClause
** mid
;
319 unsigned int unNumMid
;
324 int id
; ///< id of bool or int variable
327 typedef struct VTX_FETCH_METHOD
329 GLboolean bEnableMini
;
330 GLuint mega_fetch_remainder
;
333 typedef struct SUB_OFFSET
337 GLuint unStackDepthMax
;
338 PRESUB_DESC
* pPresubDesc
;
339 TypedShaderList lstCFInstructions_local
;
342 typedef struct CALLER_POINTER
346 R700ControlFlowGenericClause
* cf_ptr
;
348 R700ControlFlowGenericClause
* prelude_cf_ptr
;
349 R700ControlFlowGenericClause
* finale_cf_ptr
;
352 #define SQ_MAX_CALL_DEPTH 0x00000020
354 typedef struct CALL_LEVEL
356 unsigned int FCSP_BeforeEntry
;
360 TypedShaderList
* plstCFInstructions_local
;
363 #define HAS_CURRENT_LOOPRET 0x1L
364 #define HAS_LOOPRET 0x2L
365 #define LOOPRET_FLAGS HAS_LOOPRET | HAS_CURRENT_LOOPRET
367 typedef struct r700_AssemblerBase
369 R700ControlFlowSXClause
* cf_last_export_ptr
;
370 R700ControlFlowSXClause
* cf_current_export_clause_ptr
;
371 R700ControlFlowALUClause
* cf_current_alu_clause_ptr
;
372 R700ControlFlowGenericClause
* cf_current_tex_clause_ptr
;
373 R700ControlFlowGenericClause
* cf_current_vtx_clause_ptr
;
374 R700ControlFlowGenericClause
* cf_current_cf_clause_ptr
;
377 R700_Shader
* pR700Shader
;
379 // No clause has been created yet
380 CF_CLAUSE_TYPE cf_current_clause_type
;
384 GLuint number_of_exports
;
385 GLuint number_of_colorandz_exports
;
386 GLuint number_of_export_opcodes
;
393 unsigned int uLastPosUpdate
;
394 unsigned int last_cond_register
;
396 OUT_FRAGMENT_FMT_0 fp_stOutFmt0
;
400 unsigned int number_used_registers
;
401 unsigned int uUsedConsts
;
403 unsigned int flag_reg_index
;
406 unsigned int uiFP_AttributeMap
[FRAG_ATTRIB_MAX
];
407 unsigned int uiFP_OutputMap
[FRAG_RESULT_MAX
];
408 unsigned int uBoolConsts
;
409 unsigned int uIntConsts
;
411 unsigned int uConsts
;
414 unsigned char ucVP_AttributeMap
[VERT_ATTRIB_MAX
];
415 unsigned char ucVP_OutputMap
[VERT_RESULT_MAX
];
417 unsigned char * pucOutMask
;
419 //-----------------------------------------------------------------------------------
420 // flow control members
421 //-----------------------------------------------------------------------------------
423 FC_LEVEL fc_stack
[32];
425 //-----------------------------------------------------------------------------------
426 // ArgSubst used in Assemble_Source() function
427 //-----------------------------------------------------------------------------------
430 GLint hw_gpr
[ NUMBER_OF_CYCLES
][ NUMBER_OF_COMPONENTS
];
431 GLint hw_cfile_addr
[ NUMBER_OF_COMPONENTS
];
432 GLint hw_cfile_chan
[ NUMBER_OF_COMPONENTS
];
436 GLint color_export_register_number
[NUMBER_OF_OUTPUT_COLORS
];
437 GLint depth_export_register_number
;
439 GLint stencil_export_register_number
;
440 GLint coverage_to_mask_export_register_number
;
441 GLint mask_export_register_number
;
443 GLuint starting_export_register_number
;
444 GLuint starting_vfetch_register_number
;
445 GLuint starting_temp_register_number
;
447 GLuint uFirstHelpReg
;
449 GLboolean input_position_is_used
;
450 GLboolean input_normal_is_used
;
452 GLboolean input_color_is_used
[NUMBER_OF_INPUT_COLORS
];
454 GLboolean input_texture_unit_is_used
[NUMBER_OF_TEXTURE_UNITS
];
456 R700VertexGenericFetch
* vfetch_instruction_ptr_array
[VERT_ATTRIB_MAX
];
458 GLuint number_of_inputs
;
462 SHADER_PIPE_TYPE currentShaderType
;
463 struct prog_instruction
* pILInst
;
465 GLubyte SamplerUnits
[MAX_SAMPLERS
];
467 /* helper to decide which type of instruction to assemble */
469 /* we inserted helper intructions and need barrier on next TEX ins */
470 GLboolean need_tex_barrier
;
473 GLuint unSubArraySize
;
474 GLuint unSubArrayPointer
;
475 CALLER_POINTER
* callers
;
476 GLuint unCallerArraySize
;
477 GLuint unCallerArrayPointer
;
479 CALL_LEVEL CALLSTACK
[SQ_MAX_CALL_DEPTH
];
483 PRESUB_DESC
* presubs
;
484 GLuint unPresubArraySize
;
486 GLuint unCurNumILInsts
;
490 GLuint shadow_regs
[R700_MAX_TEXTURE_UNITS
];
492 } r700_AssemblerBase
;
495 BITS
addrmode_PVSDST(PVSDST
* pPVSDST
);
496 void setaddrmode_PVSDST(PVSDST
* pPVSDST
, BITS addrmode
);
497 void nomask_PVSDST(PVSDST
* pPVSDST
);
498 BITS
addrmode_PVSSRC(PVSSRC
* pPVSSRC
);
499 void setaddrmode_PVSSRC(PVSSRC
* pPVSSRC
, BITS addrmode
);
500 void setswizzle_PVSSRC(PVSSRC
* pPVSSRC
, BITS swz
);
501 void noswizzle_PVSSRC(PVSSRC
* pPVSSRC
);
502 void swizzleagain_PVSSRC(PVSSRC
* pPVSSRC
, BITS x
, BITS y
, BITS z
, BITS w
);
503 void neg_PVSSRC(PVSSRC
* pPVSSRC
);
504 void noneg_PVSSRC(PVSSRC
* pPVSSRC
);
505 void flipneg_PVSSRC(PVSSRC
* pPVSSRC
);
506 void zerocomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
);
507 void onecomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
);
508 BITS
is_misc_component_exported(VAP_OUT_VTX_FMT_0
* pOutVTXFmt0
);
509 BITS
is_depth_component_exported(OUT_FRAGMENT_FMT_0
* pFPOutFmt
) ;
510 GLboolean
is_reduction_opcode(PVSDWORD
* dest
);
511 GLuint
GetSurfaceFormat(GLenum eType
, GLuint nChannels
, GLuint
* pClient_size
);
513 unsigned int r700GetNumOperands(GLuint opcode
, GLuint nIsOp3
);
515 GLboolean
IsTex(gl_inst_opcode Opcode
);
516 GLboolean
IsAlu(gl_inst_opcode Opcode
);
517 int check_current_clause(r700_AssemblerBase
* pAsm
,
518 CF_CLAUSE_TYPE new_clause_type
);
519 GLboolean
add_vfetch_instruction(r700_AssemblerBase
* pAsm
,
520 R700VertexInstruction
* vertex_instruction_ptr
);
521 GLboolean
add_tex_instruction(r700_AssemblerBase
* pAsm
,
522 R700TextureInstruction
* tex_instruction_ptr
);
523 GLboolean
assemble_vfetch_instruction(r700_AssemblerBase
* pAsm
,
525 GLuint destination_register
,
526 GLuint number_of_elements
,
527 GLenum dataElementType
,
528 VTX_FETCH_METHOD
* pFetchMethod
);
529 GLboolean
assemble_vfetch_instruction2(r700_AssemblerBase
* pAsm
,
530 GLuint destination_register
,
537 VTX_FETCH_METHOD
* pFetchMethod
);
538 GLboolean
cleanup_vfetch_instructions(r700_AssemblerBase
* pAsm
);
539 GLuint
gethelpr(r700_AssemblerBase
* pAsm
);
540 void resethelpr(r700_AssemblerBase
* pAsm
);
541 void checkop_init(r700_AssemblerBase
* pAsm
);
542 GLboolean
mov_temp(r700_AssemblerBase
* pAsm
, int src
);
543 GLboolean
checkop1(r700_AssemblerBase
* pAsm
);
544 GLboolean
checkop2(r700_AssemblerBase
* pAsm
);
545 GLboolean
checkop3(r700_AssemblerBase
* pAsm
);
546 GLboolean
assemble_src(r700_AssemblerBase
*pAsm
,
549 GLboolean
assemble_dst(r700_AssemblerBase
*pAsm
);
550 GLboolean
tex_dst(r700_AssemblerBase
*pAsm
);
551 GLboolean
tex_src(r700_AssemblerBase
*pAsm
);
552 GLboolean
assemble_tex_instruction(r700_AssemblerBase
*pAsm
, GLboolean normalized
);
553 void initialize(r700_AssemblerBase
*pAsm
);
554 GLboolean
assemble_alu_src(R700ALUInstruction
* alu_instruction_ptr
,
557 BITS scalar_channel_index
);
558 GLboolean
add_alu_instruction(r700_AssemblerBase
* pAsm
,
559 R700ALUInstruction
* alu_instruction_ptr
,
560 GLuint contiguous_slots_needed
);
562 GLboolean
add_cf_instruction(r700_AssemblerBase
* pAsm
);
563 void add_return_inst(r700_AssemblerBase
*pAsm
);
565 void get_src_properties(R700ALUInstruction
* alu_instruction_ptr
,
571 int is_cfile(BITS sel
);
572 int is_const(BITS sel
);
573 int is_gpr(BITS sel
);
574 GLboolean
reserve_cfile(r700_AssemblerBase
* pAsm
,
577 GLboolean
reserve_gpr(r700_AssemblerBase
* pAsm
, GLuint sel
, GLuint chan
, GLuint cycle
);
578 GLboolean
cycle_for_scalar_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
);
579 GLboolean
cycle_for_vector_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
);
580 GLboolean
check_scalar(r700_AssemblerBase
* pAsm
,
581 R700ALUInstruction
* alu_instruction_ptr
);
582 GLboolean
check_vector(r700_AssemblerBase
* pAsm
,
583 R700ALUInstruction
* alu_instruction_ptr
);
584 GLboolean
assemble_alu_instruction(r700_AssemblerBase
*pAsm
);
585 GLboolean
next_ins(r700_AssemblerBase
*pAsm
);
587 GLboolean
pops(r700_AssemblerBase
*pAsm
, GLuint pops
);
588 GLboolean
jumpToOffest(r700_AssemblerBase
*pAsm
, GLuint pops
, GLint offset
);
589 GLboolean
setRetInLoopFlag(r700_AssemblerBase
*pAsm
, GLuint flagValue
);
590 GLboolean
testFlag(r700_AssemblerBase
*pAsm
);
591 GLboolean
breakLoopOnFlag(r700_AssemblerBase
*pAsm
, GLuint unFCSP
);
592 GLboolean
returnOnFlag(r700_AssemblerBase
*pAsm
, GLuint unIF
);
594 GLboolean
assemble_math_function(r700_AssemblerBase
* pAsm
, BITS opcode
);
595 GLboolean
assemble_ABS(r700_AssemblerBase
*pAsm
);
596 GLboolean
assemble_ADD(r700_AssemblerBase
*pAsm
);
597 GLboolean
assemble_ARL(r700_AssemblerBase
*pAsm
);
598 GLboolean
assemble_BAD(char *opcode_str
);
599 GLboolean
assemble_CMP(r700_AssemblerBase
*pAsm
);
600 GLboolean
assemble_DOT(r700_AssemblerBase
*pAsm
);
601 GLboolean
assemble_DST(r700_AssemblerBase
*pAsm
);
602 GLboolean
assemble_EX2(r700_AssemblerBase
*pAsm
);
603 GLboolean
assemble_EXP(r700_AssemblerBase
*pAsm
);
604 GLboolean
assemble_FLR(r700_AssemblerBase
*pAsm
);
605 GLboolean
assemble_FLR_INT(r700_AssemblerBase
*pAsm
);
606 GLboolean
assemble_FRC(r700_AssemblerBase
*pAsm
);
607 GLboolean
assemble_KIL(r700_AssemblerBase
*pAsm
, GLuint opcode
);
608 GLboolean
assemble_LG2(r700_AssemblerBase
*pAsm
);
609 GLboolean
assemble_LRP(r700_AssemblerBase
*pAsm
);
610 GLboolean
assemble_LOG(r700_AssemblerBase
*pAsm
);
611 GLboolean
assemble_MAD(r700_AssemblerBase
*pAsm
);
612 GLboolean
assemble_LIT(r700_AssemblerBase
*pAsm
);
613 GLboolean
assemble_MAX(r700_AssemblerBase
*pAsm
);
614 GLboolean
assemble_MIN(r700_AssemblerBase
*pAsm
);
615 GLboolean
assemble_MOV(r700_AssemblerBase
*pAsm
);
616 GLboolean
assemble_MUL(r700_AssemblerBase
*pAsm
);
617 GLboolean
assemble_POW(r700_AssemblerBase
*pAsm
);
618 GLboolean
assemble_RCP(r700_AssemblerBase
*pAsm
);
619 GLboolean
assemble_RSQ(r700_AssemblerBase
*pAsm
);
620 GLboolean
assemble_SCS(r700_AssemblerBase
*pAsm
);
621 GLboolean
assemble_SGE(r700_AssemblerBase
*pAsm
);
622 GLboolean
assemble_CONT(r700_AssemblerBase
*pAsm
);
624 GLboolean
assemble_LOGIC(r700_AssemblerBase
*pAsm
, BITS opcode
);
625 GLboolean
assemble_LOGIC_PRED(r700_AssemblerBase
*pAsm
, BITS opcode
);
626 GLboolean
assemble_TRIG(r700_AssemblerBase
*pAsm
, BITS opcode
);
628 GLboolean
assemble_SLT(r700_AssemblerBase
*pAsm
);
629 GLboolean
assemble_STP(r700_AssemblerBase
*pAsm
);
630 GLboolean
assemble_TEX(r700_AssemblerBase
*pAsm
);
631 GLboolean
assemble_XPD(r700_AssemblerBase
*pAsm
);
632 GLboolean
assemble_EXPORT(r700_AssemblerBase
*pAsm
);
633 GLboolean
assemble_IF(r700_AssemblerBase
*pAsm
, GLboolean bHasElse
);
634 GLboolean
assemble_ELSE(r700_AssemblerBase
*pAsm
);
635 GLboolean
assemble_ENDIF(r700_AssemblerBase
*pAsm
);
637 GLboolean
assemble_BGNLOOP(r700_AssemblerBase
*pAsm
);
638 GLboolean
assemble_BRK(r700_AssemblerBase
*pAsm
);
639 GLboolean
assemble_COND(r700_AssemblerBase
*pAsm
);
640 GLboolean
assemble_ENDLOOP(r700_AssemblerBase
*pAsm
);
642 GLboolean
assemble_BGNSUB(r700_AssemblerBase
*pAsm
, GLint nILindex
, GLuint uiIL_Shift
);
643 GLboolean
assemble_ENDSUB(r700_AssemblerBase
*pAsm
);
644 GLboolean
assemble_RET(r700_AssemblerBase
*pAsm
);
645 GLboolean
assemble_CAL(r700_AssemblerBase
*pAsm
,
648 GLuint uiNumberInsts
,
649 struct prog_instruction
*pILInst
,
650 PRESUB_DESC
* pPresubDesc
);
652 GLboolean
Process_Export(r700_AssemblerBase
* pAsm
,
654 GLuint export_starting_index
,
656 GLuint starting_register_number
,
657 GLboolean is_depth_export
);
658 GLboolean
Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase
*pAsm
,
659 BITS depth_channel_select
);
661 GLboolean
callPreSub(r700_AssemblerBase
* pAsm
,
662 LOADABLE_SCRIPT_SIGNITURE scriptSigniture
,
663 /* struct prog_instruction ** pILInstParent, */
664 COMPILED_SUB
* pCompiledSub
,
666 GLshort uNumValidSrc
);
669 GLboolean
AssembleInstr(GLuint uiFirstInst
,
671 GLuint uiNumberInsts
,
672 struct prog_instruction
*pILInst
,
673 r700_AssemblerBase
*pR700AsmCode
);
674 GLboolean
Process_Fragment_Exports(r700_AssemblerBase
*pR700AsmCode
, GLbitfield OutputsWritten
);
675 GLboolean
Process_Vertex_Exports(r700_AssemblerBase
*pR700AsmCode
, GLbitfield OutputsWritten
);
677 GLboolean
RelocProgram(r700_AssemblerBase
* pAsm
, struct gl_program
* pILProg
);
678 GLboolean
InitShaderProgram(r700_AssemblerBase
* pAsm
);
680 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt
, r700_AssemblerBase
* pAsm
, R700_Shader
* pShader
);
681 GLboolean
Clean_Up_Assembler(r700_AssemblerBase
*pR700AsmCode
);
683 #endif //_R700_ASSEMBLER_H_