2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
27 #ifndef _R700_ASSEMBLER_H_
28 #define _R700_ASSEMBLER_H_
30 #include "main/mtypes.h"
31 #include "shader/prog_instruction.h"
33 #include "r700_chip.h"
34 #include "r700_shaderinst.h"
35 #include "r700_shader.h"
37 typedef enum SHADER_PIPE_TYPE
43 typedef enum ConstantCycles
46 NUMBER_OF_COMPONENTS
= 4
49 typedef enum HARDWARE_LIMIT_VALUES
51 TEMPORARY_REGISTER_OFFSET
= SQ_ALU_SRC_GPR_BASE
,
52 MAX_TEMPORARY_REGISTERS
= SQ_ALU_SRC_GPR_SIZE
,
53 MAX_CONSTANT_REGISTERS
= SQ_ALU_SRC_CFILE_SIZE
,
54 CFILE_REGISTER_OFFSET
= SQ_ALU_SRC_CFILE_BASE
,
55 NUMBER_OF_INPUT_COLORS
= 2,
56 NUMBER_OF_OUTPUT_COLORS
= 8,
57 NUMBER_OF_TEXTURE_UNITS
= 16,
59 } HARDWARE_LIMIT_VALUES
;
61 typedef enum AddressMode
65 ADDR_RELATIVE_FLI_0
= 2,
66 NUMBER_OF_ADDR_MOD
= 3
69 typedef enum SrcRegisterType
71 SRC_REG_TEMPORARY
= 0,
74 SRC_REG_ALT_TEMPORARY
= 3,
76 NUMBER_OF_SRC_REG_TYPE
= 5
79 typedef enum DstRegisterType
81 DST_REG_TEMPORARY
= 0,
84 DST_REG_OUT_X_REPL
= 3,
85 DST_REG_ALT_TEMPORARY
= 4,
87 NUMBER_OF_DST_REG_TYPE
= 6
90 typedef unsigned int BITS
;
92 typedef struct PVSDSTtag
94 BITS opcode
:8; //(:6) //@@@ really should be 10 bits for OP2
96 BITS predicated
:1; //10 //8
97 BITS pred_inv
:1; //11 //8
100 BITS reg
:10; //24 //20
107 BITS op3
:1; // 29 Represents *_OP3_* ALU opcode
109 BITS dualop
:1; // 30 //26
111 BITS addrmode0
:1; //31 //29
112 BITS addrmode1
:1; //32
115 typedef struct PVSINSTtag
118 BITS SaturateMode
:2;
121 typedef struct PVSSRCtag
125 BITS reg
:10; //15 (8)
129 BITS swizzlew
:3; //27
136 BITS addrmode1
:1; //32
139 typedef struct PVSMATHtag
146 BITS dstoff
:2; // 2 bits of dest offset into alt ram
150 BITS dstcomp
:2; // select dest component
154 typedef union PVSDWORDtag
164 typedef struct VAP_OUT_VTX_FMT_0tag
170 BITS pos_param
:1; // 4
185 BITS point_size
:1; // 15
187 BITS rta_index
:1; // shares same channel as kill_flag
189 BITS viewport_index
:1; // 19
191 BITS resvd1
:12; // 20
194 typedef struct VAP_OUT_VTX_FMT_1tag
208 typedef struct VAP_OUT_VTX_FMT_2tag
222 typedef struct OUT_FRAGMENT_FMT_0tag
235 BITS coverage_to_mask
:1;
239 } OUT_FRAGMENT_FMT_0
;
241 typedef enum CF_CLAUSE_TYPE
249 NUMBER_CF_CLAUSE_TYPES
254 MAX_BOOL_CONSTANTS
= 32,
255 MAX_INT_CONSTANTS
= 32,
256 MAX_FLOAT_CONSTANTS
= 256,
270 SAFEDIST_TEX
= 6, ///< safe distance for using result of texture lookup in alu or another tex lookup
271 SAFEDIST_ALU
= 6 ///< the same for alu->fc
274 typedef struct FC_LEVEL
276 R700ControlFlowGenericClause
* first
;
277 R700ControlFlowGenericClause
** mid
;
278 unsigned int unNumMid
;
283 unsigned int bpush
; ///< 1 if first instruction does branch stack push
284 int id
; ///< id of bool or int variable
287 typedef struct VTX_FETCH_METHOD
289 GLboolean bEnableMini
;
290 GLuint mega_fetch_remainder
;
293 typedef struct SUB_OFFSET
297 TypedShaderList lstCFInstructions_local
;
300 typedef struct CALLER_POINTER
304 R700ControlFlowGenericClause
* cf_ptr
;
307 #define SQ_MAX_CALL_DEPTH 0x00000020
309 typedef struct STACK_USAGE
316 typedef union STACKDWORDtag
322 typedef struct CALL_LEVEL
324 unsigned int FCSP_BeforeEntry
;
325 STACKDWORD stackUsage
;
326 TypedShaderList
* plstCFInstructions_local
;
329 #define HAS_CURRENT_LOOPRET 0x1L
330 #define HAS_LOOPRET 0x2L
331 #define LOOPRET_FLAGS HAS_LOOPRET | HAS_CURRENT_LOOPRET
333 typedef struct r700_AssemblerBase
335 R700ControlFlowSXClause
* cf_last_export_ptr
;
336 R700ControlFlowSXClause
* cf_current_export_clause_ptr
;
337 R700ControlFlowALUClause
* cf_current_alu_clause_ptr
;
338 R700ControlFlowGenericClause
* cf_current_tex_clause_ptr
;
339 R700ControlFlowGenericClause
* cf_current_vtx_clause_ptr
;
340 R700ControlFlowGenericClause
* cf_current_cf_clause_ptr
;
343 R700_Shader
* pR700Shader
;
345 // No clause has been created yet
346 CF_CLAUSE_TYPE cf_current_clause_type
;
350 GLuint number_of_exports
;
351 GLuint number_of_colorandz_exports
;
352 GLuint number_of_export_opcodes
;
358 unsigned int uLastPosUpdate
;
360 OUT_FRAGMENT_FMT_0 fp_stOutFmt0
;
364 unsigned int number_used_registers
;
365 unsigned int uUsedConsts
;
367 unsigned int flag_reg_index
;
370 unsigned int uiFP_AttributeMap
[FRAG_ATTRIB_MAX
];
371 unsigned int uiFP_OutputMap
[FRAG_RESULT_MAX
];
372 unsigned int uBoolConsts
;
373 unsigned int uIntConsts
;
375 unsigned int uConsts
;
378 unsigned char ucVP_AttributeMap
[VERT_ATTRIB_MAX
];
379 unsigned char ucVP_OutputMap
[VERT_RESULT_MAX
];
381 unsigned char * pucOutMask
;
383 //-----------------------------------------------------------------------------------
384 // flow control members
385 //-----------------------------------------------------------------------------------
387 FC_LEVEL fc_stack
[32];
389 unsigned int branch_depth
;
390 unsigned int max_branch_depth
;
392 //-----------------------------------------------------------------------------------
393 // ArgSubst used in Assemble_Source() function
394 //-----------------------------------------------------------------------------------
397 GLint hw_gpr
[ NUMBER_OF_CYCLES
][ NUMBER_OF_COMPONENTS
];
398 GLint hw_cfile_addr
[ NUMBER_OF_COMPONENTS
];
399 GLint hw_cfile_chan
[ NUMBER_OF_COMPONENTS
];
403 GLint color_export_register_number
[NUMBER_OF_OUTPUT_COLORS
];
404 GLint depth_export_register_number
;
406 GLint stencil_export_register_number
;
407 GLint coverage_to_mask_export_register_number
;
408 GLint mask_export_register_number
;
410 GLuint starting_export_register_number
;
411 GLuint starting_vfetch_register_number
;
412 GLuint starting_temp_register_number
;
414 GLuint uFirstHelpReg
;
416 GLboolean input_position_is_used
;
417 GLboolean input_normal_is_used
;
419 GLboolean input_color_is_used
[NUMBER_OF_INPUT_COLORS
];
421 GLboolean input_texture_unit_is_used
[NUMBER_OF_TEXTURE_UNITS
];
423 R700VertexGenericFetch
* vfetch_instruction_ptr_array
[VERT_ATTRIB_MAX
];
425 GLuint number_of_inputs
;
429 SHADER_PIPE_TYPE currentShaderType
;
430 struct prog_instruction
* pILInst
;
433 /* helper to decide which type of instruction to assemble */
435 /* we inserted helper intructions and need barrier on next TEX ins */
436 GLboolean need_tex_barrier
;
439 GLuint unSubArraySize
;
440 GLuint unSubArrayPointer
;
441 CALLER_POINTER
* callers
;
442 GLuint unCallerArraySize
;
443 GLuint unCallerArrayPointer
;
445 CALL_LEVEL CALLSTACK
[SQ_MAX_CALL_DEPTH
];
449 } r700_AssemblerBase
;
452 inline void checkStackDepth(r700_AssemblerBase
*pAsm
, GLuint uReason
);
453 BITS
addrmode_PVSDST(PVSDST
* pPVSDST
);
454 void setaddrmode_PVSDST(PVSDST
* pPVSDST
, BITS addrmode
);
455 void nomask_PVSDST(PVSDST
* pPVSDST
);
456 BITS
addrmode_PVSSRC(PVSSRC
* pPVSSRC
);
457 void setaddrmode_PVSSRC(PVSSRC
* pPVSSRC
, BITS addrmode
);
458 void setswizzle_PVSSRC(PVSSRC
* pPVSSRC
, BITS swz
);
459 void noswizzle_PVSSRC(PVSSRC
* pPVSSRC
);
460 void swizzleagain_PVSSRC(PVSSRC
* pPVSSRC
, BITS x
, BITS y
, BITS z
, BITS w
);
461 void neg_PVSSRC(PVSSRC
* pPVSSRC
);
462 void noneg_PVSSRC(PVSSRC
* pPVSSRC
);
463 void flipneg_PVSSRC(PVSSRC
* pPVSSRC
);
464 void zerocomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
);
465 void onecomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
);
466 BITS
is_misc_component_exported(VAP_OUT_VTX_FMT_0
* pOutVTXFmt0
);
467 BITS
is_depth_component_exported(OUT_FRAGMENT_FMT_0
* pFPOutFmt
) ;
468 GLboolean
is_reduction_opcode(PVSDWORD
* dest
);
469 GLuint
GetSurfaceFormat(GLenum eType
, GLuint nChannels
, GLuint
* pClient_size
);
471 unsigned int r700GetNumOperands(r700_AssemblerBase
* pAsm
);
473 GLboolean
IsTex(gl_inst_opcode Opcode
);
474 GLboolean
IsAlu(gl_inst_opcode Opcode
);
475 int check_current_clause(r700_AssemblerBase
* pAsm
,
476 CF_CLAUSE_TYPE new_clause_type
);
477 GLboolean
add_vfetch_instruction(r700_AssemblerBase
* pAsm
,
478 R700VertexInstruction
* vertex_instruction_ptr
);
479 GLboolean
add_tex_instruction(r700_AssemblerBase
* pAsm
,
480 R700TextureInstruction
* tex_instruction_ptr
);
481 GLboolean
assemble_vfetch_instruction(r700_AssemblerBase
* pAsm
,
483 GLuint destination_register
,
484 GLuint number_of_elements
,
485 GLenum dataElementType
,
486 VTX_FETCH_METHOD
* pFetchMethod
);
487 GLboolean
assemble_vfetch_instruction2(r700_AssemblerBase
* pAsm
,
488 GLuint destination_register
,
494 VTX_FETCH_METHOD
* pFetchMethod
);
495 GLboolean
cleanup_vfetch_instructions(r700_AssemblerBase
* pAsm
);
496 GLuint
gethelpr(r700_AssemblerBase
* pAsm
);
497 void resethelpr(r700_AssemblerBase
* pAsm
);
498 void checkop_init(r700_AssemblerBase
* pAsm
);
499 GLboolean
mov_temp(r700_AssemblerBase
* pAsm
, int src
);
500 GLboolean
checkop1(r700_AssemblerBase
* pAsm
);
501 GLboolean
checkop2(r700_AssemblerBase
* pAsm
);
502 GLboolean
checkop3(r700_AssemblerBase
* pAsm
);
503 GLboolean
assemble_src(r700_AssemblerBase
*pAsm
,
506 GLboolean
assemble_dst(r700_AssemblerBase
*pAsm
);
507 GLboolean
tex_dst(r700_AssemblerBase
*pAsm
);
508 GLboolean
tex_src(r700_AssemblerBase
*pAsm
);
509 GLboolean
assemble_tex_instruction(r700_AssemblerBase
*pAsm
, GLboolean normalized
);
510 void initialize(r700_AssemblerBase
*pAsm
);
511 GLboolean
assemble_alu_src(R700ALUInstruction
* alu_instruction_ptr
,
514 BITS scalar_channel_index
);
515 GLboolean
add_alu_instruction(r700_AssemblerBase
* pAsm
,
516 R700ALUInstruction
* alu_instruction_ptr
,
517 GLuint contiguous_slots_needed
);
519 GLboolean
add_cf_instruction(r700_AssemblerBase
* pAsm
);
520 void add_return_inst(r700_AssemblerBase
*pAsm
);
522 void get_src_properties(R700ALUInstruction
* alu_instruction_ptr
,
528 int is_cfile(BITS sel
);
529 int is_const(BITS sel
);
530 int is_gpr(BITS sel
);
531 GLboolean
reserve_cfile(r700_AssemblerBase
* pAsm
,
534 GLboolean
reserve_gpr(r700_AssemblerBase
* pAsm
, GLuint sel
, GLuint chan
, GLuint cycle
);
535 GLboolean
cycle_for_scalar_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
);
536 GLboolean
cycle_for_vector_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
);
537 GLboolean
check_scalar(r700_AssemblerBase
* pAsm
,
538 R700ALUInstruction
* alu_instruction_ptr
);
539 GLboolean
check_vector(r700_AssemblerBase
* pAsm
,
540 R700ALUInstruction
* alu_instruction_ptr
);
541 GLboolean
assemble_alu_instruction(r700_AssemblerBase
*pAsm
);
542 GLboolean
next_ins(r700_AssemblerBase
*pAsm
);
544 GLboolean
next_ins2(r700_AssemblerBase
*pAsm
);
545 GLboolean
assemble_alu_instruction2(r700_AssemblerBase
*pAsm
);
547 /* TODO : merge next_ins/2/literal, assemble_alu_instruction/2/literal */
548 GLboolean
next_ins_literal(r700_AssemblerBase
*pAsm
, GLfloat
* pLiteral
);
549 GLboolean
assemble_alu_instruction_literal(r700_AssemblerBase
*pAsm
, GLfloat
* pLiteral
);
551 GLboolean
pops(r700_AssemblerBase
*pAsm
, GLuint pops
);
552 GLboolean
jumpToOffest(r700_AssemblerBase
*pAsm
, GLuint pops
, GLint offset
);
553 GLboolean
setRetInLoopFlag(r700_AssemblerBase
*pAsm
, GLuint flagValue
);
554 GLboolean
testFlag(r700_AssemblerBase
*pAsm
);
555 GLboolean
breakLoopOnFlag(r700_AssemblerBase
*pAsm
, GLuint unFCSP
);
556 GLboolean
returnOnFlag(r700_AssemblerBase
*pAsm
, GLuint unIF
);
558 GLboolean
assemble_math_function(r700_AssemblerBase
* pAsm
, BITS opcode
);
559 GLboolean
assemble_ABS(r700_AssemblerBase
*pAsm
);
560 GLboolean
assemble_ADD(r700_AssemblerBase
*pAsm
);
561 GLboolean
assemble_ARL(r700_AssemblerBase
*pAsm
);
562 GLboolean
assemble_BAD(char *opcode_str
);
563 GLboolean
assemble_CMP(r700_AssemblerBase
*pAsm
);
564 GLboolean
assemble_COS(r700_AssemblerBase
*pAsm
);
565 GLboolean
assemble_DOT(r700_AssemblerBase
*pAsm
);
566 GLboolean
assemble_DST(r700_AssemblerBase
*pAsm
);
567 GLboolean
assemble_EX2(r700_AssemblerBase
*pAsm
);
568 GLboolean
assemble_EXP(r700_AssemblerBase
*pAsm
);
569 GLboolean
assemble_FLR(r700_AssemblerBase
*pAsm
);
570 GLboolean
assemble_FLR_INT(r700_AssemblerBase
*pAsm
);
571 GLboolean
assemble_FRC(r700_AssemblerBase
*pAsm
);
572 GLboolean
assemble_KIL(r700_AssemblerBase
*pAsm
);
573 GLboolean
assemble_LG2(r700_AssemblerBase
*pAsm
);
574 GLboolean
assemble_LRP(r700_AssemblerBase
*pAsm
);
575 GLboolean
assemble_LOG(r700_AssemblerBase
*pAsm
);
576 GLboolean
assemble_MAD(r700_AssemblerBase
*pAsm
);
577 GLboolean
assemble_LIT(r700_AssemblerBase
*pAsm
);
578 GLboolean
assemble_MAX(r700_AssemblerBase
*pAsm
);
579 GLboolean
assemble_MIN(r700_AssemblerBase
*pAsm
);
580 GLboolean
assemble_MOV(r700_AssemblerBase
*pAsm
);
581 GLboolean
assemble_MUL(r700_AssemblerBase
*pAsm
);
582 GLboolean
assemble_POW(r700_AssemblerBase
*pAsm
);
583 GLboolean
assemble_RCP(r700_AssemblerBase
*pAsm
);
584 GLboolean
assemble_RSQ(r700_AssemblerBase
*pAsm
);
585 GLboolean
assemble_SIN(r700_AssemblerBase
*pAsm
);
586 GLboolean
assemble_SCS(r700_AssemblerBase
*pAsm
);
587 GLboolean
assemble_SGE(r700_AssemblerBase
*pAsm
);
589 GLboolean
assemble_LOGIC(r700_AssemblerBase
*pAsm
, BITS opcode
);
590 GLboolean
assemble_LOGIC_PRED(r700_AssemblerBase
*pAsm
, BITS opcode
);
592 GLboolean
assemble_SLT(r700_AssemblerBase
*pAsm
);
593 GLboolean
assemble_STP(r700_AssemblerBase
*pAsm
);
594 GLboolean
assemble_TEX(r700_AssemblerBase
*pAsm
);
595 GLboolean
assemble_XPD(r700_AssemblerBase
*pAsm
);
596 GLboolean
assemble_EXPORT(r700_AssemblerBase
*pAsm
);
597 GLboolean
assemble_IF(r700_AssemblerBase
*pAsm
, GLboolean bHasElse
);
598 GLboolean
assemble_ELSE(r700_AssemblerBase
*pAsm
);
599 GLboolean
assemble_ENDIF(r700_AssemblerBase
*pAsm
);
601 GLboolean
assemble_BGNLOOP(r700_AssemblerBase
*pAsm
);
602 GLboolean
assemble_BRK(r700_AssemblerBase
*pAsm
);
603 GLboolean
assemble_COND(r700_AssemblerBase
*pAsm
);
604 GLboolean
assemble_ENDLOOP(r700_AssemblerBase
*pAsm
);
606 GLboolean
assemble_BGNSUB(r700_AssemblerBase
*pAsm
, GLint nILindex
);
607 GLboolean
assemble_ENDSUB(r700_AssemblerBase
*pAsm
);
608 GLboolean
assemble_RET(r700_AssemblerBase
*pAsm
);
609 GLboolean
assemble_CAL(r700_AssemblerBase
*pAsm
,
611 GLuint uiNumberInsts
,
612 struct prog_instruction
*pILInst
);
614 GLboolean
Process_Export(r700_AssemblerBase
* pAsm
,
616 GLuint export_starting_index
,
618 GLuint starting_register_number
,
619 GLboolean is_depth_export
);
620 GLboolean
Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase
*pAsm
,
621 BITS depth_channel_select
);
625 GLboolean
AssembleInstr(GLuint uiFirstInst
,
626 GLuint uiNumberInsts
,
627 struct prog_instruction
*pILInst
,
628 r700_AssemblerBase
*pR700AsmCode
);
629 GLboolean
Process_Fragment_Exports(r700_AssemblerBase
*pR700AsmCode
, GLbitfield OutputsWritten
);
630 GLboolean
Process_Vertex_Exports(r700_AssemblerBase
*pR700AsmCode
, GLbitfield OutputsWritten
);
632 GLboolean
RelocProgram(r700_AssemblerBase
* pAsm
);
633 GLboolean
InitShaderProgram(r700_AssemblerBase
* pAsm
);
635 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt
, r700_AssemblerBase
* pAsm
, R700_Shader
* pShader
);
636 GLboolean
Clean_Up_Assembler(r700_AssemblerBase
*pR700AsmCode
);
638 #endif //_R700_ASSEMBLER_H_