2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
27 #ifndef _R700_ASSEMBLER_H_
28 #define _R700_ASSEMBLER_H_
30 #include "main/mtypes.h"
31 #include "shader/prog_instruction.h"
33 #include "r700_chip.h"
34 #include "r700_shaderinst.h"
35 #include "r700_shader.h"
37 typedef enum SHADER_PIPE_TYPE
43 typedef enum ConstantCycles
46 NUMBER_OF_COMPONENTS
= 4
49 typedef enum HARDWARE_LIMIT_VALUES
51 TEMPORARY_REGISTER_OFFSET
= SQ_ALU_SRC_GPR_BASE
,
52 MAX_TEMPORARY_REGISTERS
= SQ_ALU_SRC_GPR_SIZE
,
53 MAX_CONSTANT_REGISTERS
= SQ_ALU_SRC_CFILE_SIZE
,
54 CFILE_REGISTER_OFFSET
= SQ_ALU_SRC_CFILE_BASE
,
55 NUMBER_OF_INPUT_COLORS
= 2,
56 NUMBER_OF_OUTPUT_COLORS
= 8,
57 NUMBER_OF_TEXTURE_UNITS
= 16,
59 } HARDWARE_LIMIT_VALUES
;
61 typedef enum AddressMode
65 ADDR_RELATIVE_FLI_0
= 2,
66 NUMBER_OF_ADDR_MOD
= 3
69 typedef enum SrcRegisterType
71 SRC_REG_TEMPORARY
= 0,
74 SRC_REG_ALT_TEMPORARY
= 3,
75 NUMBER_OF_SRC_REG_TYPE
= 4
78 typedef enum DstRegisterType
80 DST_REG_TEMPORARY
= 0,
83 DST_REG_OUT_X_REPL
= 3,
84 DST_REG_ALT_TEMPORARY
= 4,
86 NUMBER_OF_DST_REG_TYPE
= 6
89 typedef unsigned int BITS
;
91 typedef struct PVSDSTtag
93 BITS opcode
:8; //(:6) //@@@ really should be 10 bits for OP2
95 BITS predicated
:1; //10 //8
96 BITS pred_inv
:1; //11 //8
99 BITS reg
:10; //24 //20
106 BITS op3
:1; // 29 Represents *_OP3_* ALU opcode
108 BITS dualop
:1; // 30 //26
110 BITS addrmode0
:1; //31 //29
111 BITS addrmode1
:1; //32
114 typedef struct PVSSRCtag
118 BITS reg
:10; //15 (8)
122 BITS swizzlew
:3; //27
129 BITS addrmode1
:1; //32
132 typedef struct PVSMATHtag
139 BITS dstoff
:2; // 2 bits of dest offset into alt ram
143 BITS dstcomp
:2; // select dest component
147 typedef union PVSDWORDtag
156 typedef struct VAP_OUT_VTX_FMT_0tag
162 BITS pos_param
:1; // 4
177 BITS point_size
:1; // 15
179 BITS rta_index
:1; // shares same channel as kill_flag
181 BITS viewport_index
:1; // 19
183 BITS resvd1
:12; // 20
186 typedef struct VAP_OUT_VTX_FMT_1tag
200 typedef struct VAP_OUT_VTX_FMT_2tag
214 typedef struct OUT_FRAGMENT_FMT_0tag
227 BITS coverage_to_mask
:1;
231 } OUT_FRAGMENT_FMT_0
;
233 typedef enum CF_CLAUSE_TYPE
241 NUMBER_CF_CLAUSE_TYPES
246 MAX_BOOL_CONSTANTS
= 32,
247 MAX_INT_CONSTANTS
= 32,
248 MAX_FLOAT_CONSTANTS
= 256,
260 SAFEDIST_TEX
= 6, ///< safe distance for using result of texture lookup in alu or another tex lookup
261 SAFEDIST_ALU
= 6 ///< the same for alu->fc
264 typedef struct FC_LEVEL
266 unsigned int first
; ///< first fc instruction on level (if, rep, loop)
267 unsigned int* mid
; ///< middle instructions - else or all breaks on this level
272 unsigned int bpush
; ///< 1 if first instruction does branch stack push
273 int id
; ///< id of bool or int variable
276 typedef struct VTX_FETCH_METHOD
278 GLboolean bEnableMini
;
279 GLuint mega_fetch_remainder
;
282 typedef struct r700_AssemblerBase
284 R700ControlFlowSXClause
* cf_last_export_ptr
;
285 R700ControlFlowSXClause
* cf_current_export_clause_ptr
;
286 R700ControlFlowALUClause
* cf_current_alu_clause_ptr
;
287 R700ControlFlowGenericClause
* cf_current_tex_clause_ptr
;
288 R700ControlFlowGenericClause
* cf_current_vtx_clause_ptr
;
289 R700ControlFlowGenericClause
* cf_current_cf_clause_ptr
;
292 R700_Shader
* pR700Shader
;
294 // No clause has been created yet
295 CF_CLAUSE_TYPE cf_current_clause_type
;
297 GLuint number_of_exports
;
298 GLuint number_of_colorandz_exports
;
299 GLuint number_of_export_opcodes
;
304 unsigned int uLastPosUpdate
;
306 OUT_FRAGMENT_FMT_0 fp_stOutFmt0
;
310 unsigned int number_used_registers
;
311 unsigned int uUsedConsts
;
314 unsigned int uiFP_AttributeMap
[FRAG_ATTRIB_MAX
];
315 unsigned int uiFP_OutputMap
[FRAG_RESULT_MAX
];
316 unsigned int uBoolConsts
;
317 unsigned int uIntConsts
;
319 unsigned int uConsts
;
322 unsigned char ucVP_AttributeMap
[VERT_ATTRIB_MAX
];
323 unsigned char ucVP_OutputMap
[VERT_RESULT_MAX
];
325 unsigned char * pucOutMask
;
327 //-----------------------------------------------------------------------------------
328 // flow control members
329 //-----------------------------------------------------------------------------------
331 FC_LEVEL fc_stack
[32];
333 unsigned int branch_depth
;
334 unsigned int max_branch_depth
;
336 //-----------------------------------------------------------------------------------
337 // ArgSubst used in Assemble_Source() function
338 //-----------------------------------------------------------------------------------
341 GLint hw_gpr
[ NUMBER_OF_CYCLES
][ NUMBER_OF_COMPONENTS
];
342 GLint hw_cfile_addr
[ NUMBER_OF_COMPONENTS
];
343 GLint hw_cfile_chan
[ NUMBER_OF_COMPONENTS
];
347 GLint color_export_register_number
[NUMBER_OF_OUTPUT_COLORS
];
348 GLint depth_export_register_number
;
350 GLint stencil_export_register_number
;
351 GLint coverage_to_mask_export_register_number
;
352 GLint mask_export_register_number
;
354 GLuint starting_export_register_number
;
355 GLuint starting_vfetch_register_number
;
356 GLuint starting_temp_register_number
;
358 GLuint uFirstHelpReg
;
360 GLboolean input_position_is_used
;
361 GLboolean input_normal_is_used
;
363 GLboolean input_color_is_used
[NUMBER_OF_INPUT_COLORS
];
365 GLboolean input_texture_unit_is_used
[NUMBER_OF_TEXTURE_UNITS
];
367 R700VertexGenericFetch
* vfetch_instruction_ptr_array
[VERT_ATTRIB_MAX
];
369 GLuint number_of_inputs
;
373 SHADER_PIPE_TYPE currentShaderType
;
374 struct prog_instruction
* pILInst
;
377 /* helper to decide which type of instruction to assemble */
379 /* we inserted helper intructions and need barrier on next TEX ins */
380 GLboolean need_tex_barrier
;
381 } r700_AssemblerBase
;
384 BITS
addrmode_PVSDST(PVSDST
* pPVSDST
);
385 void setaddrmode_PVSDST(PVSDST
* pPVSDST
, BITS addrmode
);
386 void nomask_PVSDST(PVSDST
* pPVSDST
);
387 BITS
addrmode_PVSSRC(PVSSRC
* pPVSSRC
);
388 void setaddrmode_PVSSRC(PVSSRC
* pPVSSRC
, BITS addrmode
);
389 void setswizzle_PVSSRC(PVSSRC
* pPVSSRC
, BITS swz
);
390 void noswizzle_PVSSRC(PVSSRC
* pPVSSRC
);
391 void swizzleagain_PVSSRC(PVSSRC
* pPVSSRC
, BITS x
, BITS y
, BITS z
, BITS w
);
392 void neg_PVSSRC(PVSSRC
* pPVSSRC
);
393 void noneg_PVSSRC(PVSSRC
* pPVSSRC
);
394 void flipneg_PVSSRC(PVSSRC
* pPVSSRC
);
395 void zerocomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
);
396 void onecomp_PVSSRC(PVSSRC
* pPVSSRC
, int c
);
397 BITS
is_misc_component_exported(VAP_OUT_VTX_FMT_0
* pOutVTXFmt0
);
398 BITS
is_depth_component_exported(OUT_FRAGMENT_FMT_0
* pFPOutFmt
) ;
399 GLboolean
is_reduction_opcode(PVSDWORD
* dest
);
400 GLuint
GetSurfaceFormat(GLenum eType
, GLuint nChannels
, GLuint
* pClient_size
);
402 unsigned int r700GetNumOperands(r700_AssemblerBase
* pAsm
);
404 GLboolean
IsTex(gl_inst_opcode Opcode
);
405 GLboolean
IsAlu(gl_inst_opcode Opcode
);
406 int check_current_clause(r700_AssemblerBase
* pAsm
,
407 CF_CLAUSE_TYPE new_clause_type
);
408 GLboolean
add_vfetch_instruction(r700_AssemblerBase
* pAsm
,
409 R700VertexInstruction
* vertex_instruction_ptr
);
410 GLboolean
add_tex_instruction(r700_AssemblerBase
* pAsm
,
411 R700TextureInstruction
* tex_instruction_ptr
);
412 GLboolean
assemble_vfetch_instruction(r700_AssemblerBase
* pAsm
,
414 GLuint destination_register
,
415 GLuint number_of_elements
,
416 GLenum dataElementType
,
417 VTX_FETCH_METHOD
* pFetchMethod
);
418 GLboolean
assemble_vfetch_instruction2(r700_AssemblerBase
* pAsm
,
419 GLuint destination_register
,
425 VTX_FETCH_METHOD
* pFetchMethod
);
426 GLboolean
cleanup_vfetch_instructions(r700_AssemblerBase
* pAsm
);
427 GLuint
gethelpr(r700_AssemblerBase
* pAsm
);
428 void resethelpr(r700_AssemblerBase
* pAsm
);
429 void checkop_init(r700_AssemblerBase
* pAsm
);
430 GLboolean
mov_temp(r700_AssemblerBase
* pAsm
, int src
);
431 GLboolean
checkop1(r700_AssemblerBase
* pAsm
);
432 GLboolean
checkop2(r700_AssemblerBase
* pAsm
);
433 GLboolean
checkop3(r700_AssemblerBase
* pAsm
);
434 GLboolean
assemble_src(r700_AssemblerBase
*pAsm
,
437 GLboolean
assemble_dst(r700_AssemblerBase
*pAsm
);
438 GLboolean
tex_dst(r700_AssemblerBase
*pAsm
);
439 GLboolean
tex_src(r700_AssemblerBase
*pAsm
);
440 GLboolean
assemble_tex_instruction(r700_AssemblerBase
*pAsm
, GLboolean normalized
);
441 void initialize(r700_AssemblerBase
*pAsm
);
442 GLboolean
assemble_alu_src(R700ALUInstruction
* alu_instruction_ptr
,
445 BITS scalar_channel_index
);
446 GLboolean
add_alu_instruction(r700_AssemblerBase
* pAsm
,
447 R700ALUInstruction
* alu_instruction_ptr
,
448 GLuint contiguous_slots_needed
);
449 void get_src_properties(R700ALUInstruction
* alu_instruction_ptr
,
455 int is_cfile(BITS sel
);
456 int is_const(BITS sel
);
457 int is_gpr(BITS sel
);
458 GLboolean
reserve_cfile(r700_AssemblerBase
* pAsm
,
461 GLboolean
reserve_gpr(r700_AssemblerBase
* pAsm
, GLuint sel
, GLuint chan
, GLuint cycle
);
462 GLboolean
cycle_for_scalar_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
);
463 GLboolean
cycle_for_vector_bank_swizzle(const int swiz
, const int sel
, GLuint
* pCycle
);
464 GLboolean
check_scalar(r700_AssemblerBase
* pAsm
,
465 R700ALUInstruction
* alu_instruction_ptr
);
466 GLboolean
check_vector(r700_AssemblerBase
* pAsm
,
467 R700ALUInstruction
* alu_instruction_ptr
);
468 GLboolean
assemble_alu_instruction(r700_AssemblerBase
*pAsm
);
469 GLboolean
next_ins(r700_AssemblerBase
*pAsm
);
470 GLboolean
assemble_math_function(r700_AssemblerBase
* pAsm
, BITS opcode
);
471 GLboolean
assemble_ABS(r700_AssemblerBase
*pAsm
);
472 GLboolean
assemble_ADD(r700_AssemblerBase
*pAsm
);
473 GLboolean
assemble_BAD(char *opcode_str
);
474 GLboolean
assemble_CMP(r700_AssemblerBase
*pAsm
);
475 GLboolean
assemble_COS(r700_AssemblerBase
*pAsm
);
476 GLboolean
assemble_DOT(r700_AssemblerBase
*pAsm
);
477 GLboolean
assemble_DST(r700_AssemblerBase
*pAsm
);
478 GLboolean
assemble_EX2(r700_AssemblerBase
*pAsm
);
479 GLboolean
assemble_FLR(r700_AssemblerBase
*pAsm
);
480 GLboolean
assemble_FLR_INT(r700_AssemblerBase
*pAsm
);
481 GLboolean
assemble_FRC(r700_AssemblerBase
*pAsm
);
482 GLboolean
assemble_KIL(r700_AssemblerBase
*pAsm
);
483 GLboolean
assemble_LG2(r700_AssemblerBase
*pAsm
);
484 GLboolean
assemble_LRP(r700_AssemblerBase
*pAsm
);
485 GLboolean
assemble_MAD(r700_AssemblerBase
*pAsm
);
486 GLboolean
assemble_LIT(r700_AssemblerBase
*pAsm
);
487 GLboolean
assemble_MAX(r700_AssemblerBase
*pAsm
);
488 GLboolean
assemble_MIN(r700_AssemblerBase
*pAsm
);
489 GLboolean
assemble_MOV(r700_AssemblerBase
*pAsm
);
490 GLboolean
assemble_MUL(r700_AssemblerBase
*pAsm
);
491 GLboolean
assemble_POW(r700_AssemblerBase
*pAsm
);
492 GLboolean
assemble_RCP(r700_AssemblerBase
*pAsm
);
493 GLboolean
assemble_RSQ(r700_AssemblerBase
*pAsm
);
494 GLboolean
assemble_SIN(r700_AssemblerBase
*pAsm
);
495 GLboolean
assemble_SCS(r700_AssemblerBase
*pAsm
);
496 GLboolean
assemble_SGE(r700_AssemblerBase
*pAsm
);
497 GLboolean
assemble_SLT(r700_AssemblerBase
*pAsm
);
498 GLboolean
assemble_STP(r700_AssemblerBase
*pAsm
);
499 GLboolean
assemble_TEX(r700_AssemblerBase
*pAsm
);
500 GLboolean
assemble_XPD(r700_AssemblerBase
*pAsm
);
501 GLboolean
assemble_EXPORT(r700_AssemblerBase
*pAsm
);
502 GLboolean
assemble_IF(r700_AssemblerBase
*pAsm
);
503 GLboolean
assemble_ENDIF(r700_AssemblerBase
*pAsm
);
505 GLboolean
Process_Export(r700_AssemblerBase
* pAsm
,
507 GLuint export_starting_index
,
509 GLuint starting_register_number
,
510 GLboolean is_depth_export
);
511 GLboolean
Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase
*pAsm
,
512 BITS depth_channel_select
);
516 GLboolean
AssembleInstr(GLuint uiNumberInsts
,
517 struct prog_instruction
*pILInst
,
518 r700_AssemblerBase
*pR700AsmCode
);
519 GLboolean
Process_Fragment_Exports(r700_AssemblerBase
*pR700AsmCode
, GLbitfield OutputsWritten
);
520 GLboolean
Process_Vertex_Exports(r700_AssemblerBase
*pR700AsmCode
, GLbitfield OutputsWritten
);
522 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt
, r700_AssemblerBase
* pAsm
, R700_Shader
* pShader
);
523 GLboolean
Clean_Up_Assembler(r700_AssemblerBase
*pR700AsmCode
);
525 #endif //_R700_ASSEMBLER_H_