3 #include "nv50_ir_target.h"
4 #include "nv50_ir_build_util.h"
6 #include "nv50_ir_from_sm4.h"
8 // WTF: pass-through is implicit ??? check ReadWriteMask
12 static nv50_ir::SVSemantic
irSemantic(unsigned sn
)
15 case TGSI_SEMANTIC_POSITION
: return nv50_ir::SV_POSITION
;
16 case TGSI_SEMANTIC_FACE
: return nv50_ir::SV_FACE
;
17 case NV50_SEMANTIC_LAYER
: return nv50_ir::SV_LAYER
;
18 case NV50_SEMANTIC_VIEWPORTINDEX
: return nv50_ir::SV_VIEWPORT_INDEX
;
19 case TGSI_SEMANTIC_PSIZE
: return nv50_ir::SV_POINT_SIZE
;
20 case NV50_SEMANTIC_CLIPDISTANCE
: return nv50_ir::SV_CLIP_DISTANCE
;
21 case TGSI_SEMANTIC_VERTEXID
: return nv50_ir::SV_VERTEX_ID
;
22 case TGSI_SEMANTIC_INSTANCEID
: return nv50_ir::SV_INSTANCE_ID
;
23 case TGSI_SEMANTIC_PRIMID
: return nv50_ir::SV_PRIMITIVE_ID
;
24 case NV50_SEMANTIC_TESSFACTOR
: return nv50_ir::SV_TESS_FACTOR
;
25 case NV50_SEMANTIC_TESSCOORD
: return nv50_ir::SV_TESS_COORD
;
27 return nv50_ir::SV_UNDEFINED
;
35 using namespace nv50_ir
;
37 #define NV50_IR_MAX_RESOURCES 64
39 class Converter
: public BuildUtil
42 Converter(Program
*, struct nv50_ir_prog_info
*);
48 unsigned int nrRegVals
;
51 unsigned int nrArrays
;
52 unsigned int arrayVol
;
56 uint8_t interpMode
[PIPE_MAX_SHADER_INPUTS
];
58 // outputs for each phase
59 struct nv50_ir_varying out
[3][PIPE_MAX_SHADER_OUTPUTS
];
64 unsigned int phaseStart
;
65 unsigned int phaseInstance
;
66 unsigned int *phaseInstCnt
[2];
68 bool phaseInstanceUsed
;
69 int phaseEnded
; // (phase + 1) if $phase ended
73 Value
*srcPtr
[3][3]; // for indirect addressing, save pointer values
75 Value
*vtxBase
[3]; // base address of vertex in a primitive (TP/GP)
77 Value
*domainPt
[3]; // pre-fetched TessCoord
79 unsigned int nDstOpnds
;
89 bool shadow
[NV50_IR_MAX_RESOURCES
];
90 TexTarget resourceType
[NV50_IR_MAX_RESOURCES
][2];
92 struct nv50_ir_prog_info
& info
;
100 bool handleInstruction(unsigned int pos
);
101 bool inspectInstruction(unsigned int pos
);
102 bool handleDeclaration(const sm4_dcl
& dcl
);
103 bool inspectDeclaration(const sm4_dcl
& dcl
);
104 bool parseSignature();
106 bool haveNextPhase(unsigned int pos
) const;
108 void allocateValues();
109 void exportOutputs();
111 void emitTex(Value
*dst0
[4], TexInstruction
*, const uint8_t swizzle
[4]);
112 void handleLOAD(Value
*dst0
[4]);
113 void handleSAMPLE(operation
, Value
*dst0
[4]);
114 void handleQUERY(Value
*dst0
[4], enum TexQuery query
);
115 void handleDP(Value
*dst0
[4], int dim
);
117 Symbol
*iSym(int i
, int c
);
118 Symbol
*oSym(int i
, int c
);
120 Value
*src(int i
, int c
);
121 Value
*src(const sm4_op
&, int c
, int i
);
122 Value
*dst(int i
, int c
);
123 Value
*dst(const sm4_op
&, int c
, int i
);
124 void saveDst(int i
, int c
, Value
*value
);
125 void saveDst(const sm4_op
&, int c
, Value
*value
, int i
);
126 void saveFragDepth(operation op
, Value
*value
);
128 Value
*interpolate(const sm4_op
&, int c
, int i
);
130 Value
*getSrcPtr(int s
, int dim
, int shl
);
131 Value
*getDstPtr(int d
, int dim
, int shl
);
132 Value
*getVtxPtr(int s
);
134 bool checkDstSrcAliasing() const;
135 void insertConvergenceOps(BasicBlock
*conv
, BasicBlock
*fork
);
136 void finalizeShader();
138 operation
cvtOpcode(enum sm4_opcode op
) const;
139 unsigned int getDstOpndCount(enum sm4_opcode opcode
) const;
141 DataType
inferSrcType(enum sm4_opcode op
) const;
142 DataType
inferDstType(enum sm4_opcode op
) const;
144 unsigned g3dPrim(const unsigned prim
, unsigned *patchSize
= NULL
) const;
145 CondCode
cvtCondCode(enum sm4_opcode op
) const;
146 RoundMode
cvtRoundingMode(enum sm4_opcode op
) const;
147 TexTarget
cvtTexTarget(enum sm4_target
,
148 enum sm4_opcode
, operation
*) const;
149 SVSemantic
cvtSemantic(enum sm4_sv
, uint8_t &index
) const;
150 uint8_t cvtInterpMode(enum sm4_interpolation
) const;
152 unsigned tgsiSemantic(SVSemantic
, int index
);
153 void recordSV(unsigned sn
, unsigned si
, unsigned mask
, bool input
);
159 const struct sm4_program
& sm4
;
163 #define PRIM_CASE(a, b) \
164 case D3D_PRIMITIVE_TOPOLOGY_##a: return PIPE_PRIM_##b;
167 Converter::g3dPrim(const unsigned prim
, unsigned *patchSize
) const
170 PRIM_CASE(UNDEFINED
, POINTS
);
171 PRIM_CASE(POINTLIST
, POINTS
);
172 PRIM_CASE(LINELIST
, LINES
);
173 PRIM_CASE(LINESTRIP
, LINE_STRIP
);
174 PRIM_CASE(TRIANGLELIST
, TRIANGLES
);
175 PRIM_CASE(TRIANGLESTRIP
, TRIANGLE_STRIP
);
176 PRIM_CASE(LINELIST_ADJ
, LINES_ADJACENCY
);
177 PRIM_CASE(LINESTRIP_ADJ
, LINE_STRIP_ADJACENCY
);
178 PRIM_CASE(TRIANGLELIST_ADJ
, TRIANGLES_ADJACENCY
);
179 PRIM_CASE(TRIANGLESTRIP_ADJ
, TRIANGLES_ADJACENCY
);
181 if (prim
< D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST
||
182 prim
> D3D_PRIMITIVE_TOPOLOGY_32_CONTROL_POINT_PATCHLIST
)
183 return PIPE_PRIM_POINTS
;
186 prim
- D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST
+ 1;
187 return NV50_PRIM_PATCHES
;
191 #define IPM_CASE(n, a, b) \
192 case SM4_INTERPOLATION_##n: return NV50_IR_INTERP_##a | NV50_IR_INTERP_##b
195 Converter::cvtInterpMode(enum sm4_interpolation mode
) const
198 IPM_CASE(CONSTANT
, FLAT
, FLAT
);
199 IPM_CASE(LINEAR
, PERSPECTIVE
, PERSPECTIVE
);
200 IPM_CASE(LINEAR_CENTROID
, PERSPECTIVE
, CENTROID
);
201 IPM_CASE(LINEAR_NOPERSPECTIVE
, LINEAR
, LINEAR
);
202 IPM_CASE(LINEAR_NOPERSPECTIVE_CENTROID
, LINEAR
, CENTROID
);
203 IPM_CASE(LINEAR_SAMPLE
, PERSPECTIVE
, OFFSET
);
204 IPM_CASE(LINEAR_NOPERSPECTIVE_SAMPLE
, LINEAR
, OFFSET
);
205 IPM_CASE(UNDEFINED
, LINEAR
, LINEAR
);
207 assert(!"invalid interpolation mode");
213 setVaryingInterpMode(struct nv50_ir_varying
*var
, uint8_t mode
)
215 switch (mode
& NV50_IR_INTERP_MODE_MASK
) {
216 case NV50_IR_INTERP_LINEAR
:
219 case NV50_IR_INTERP_FLAT
:
225 if (mode
& NV50_IR_INTERP_CENTROID
)
230 Converter::cvtRoundingMode(enum sm4_opcode op
) const
233 case SM4_OPCODE_ROUND_NE
: return ROUND_NI
;
234 case SM4_OPCODE_ROUND_NI
: return ROUND_MI
;
235 case SM4_OPCODE_ROUND_PI
: return ROUND_PI
;
236 case SM4_OPCODE_ROUND_Z
: return ROUND_ZI
;
243 Converter::cvtCondCode(enum sm4_opcode op
) const
248 case SM4_OPCODE_IEQ
: return CC_EQ
;
252 case SM4_OPCODE_UGE
: return CC_GE
;
256 case SM4_OPCODE_ULT
: return CC_LT
;
259 case SM4_OPCODE_DNE
: return CC_NEU
;
266 Converter::inferSrcType(enum sm4_opcode op
) const
269 case SM4_OPCODE_IADD
:
273 case SM4_OPCODE_IMAD
:
274 case SM4_OPCODE_IMAX
:
275 case SM4_OPCODE_IMIN
:
276 case SM4_OPCODE_IMUL
:
278 case SM4_OPCODE_INEG
:
279 case SM4_OPCODE_ISHL
:
280 case SM4_OPCODE_ISHR
:
281 case SM4_OPCODE_ITOF
:
282 case SM4_OPCODE_ATOMIC_IADD
:
283 case SM4_OPCODE_ATOMIC_IMAX
:
284 case SM4_OPCODE_ATOMIC_IMIN
:
289 case SM4_OPCODE_UDIV
:
292 case SM4_OPCODE_UMUL
:
293 case SM4_OPCODE_UMAD
:
294 case SM4_OPCODE_UMAX
:
295 case SM4_OPCODE_UMIN
:
296 case SM4_OPCODE_USHR
:
297 case SM4_OPCODE_UTOF
:
299 case SM4_OPCODE_UADDC
:
300 case SM4_OPCODE_USUBB
:
301 case SM4_OPCODE_ATOMIC_AND
:
302 case SM4_OPCODE_ATOMIC_OR
:
303 case SM4_OPCODE_ATOMIC_XOR
:
304 case SM4_OPCODE_ATOMIC_UMAX
:
305 case SM4_OPCODE_ATOMIC_UMIN
:
307 case SM4_OPCODE_DADD
:
308 case SM4_OPCODE_DMAX
:
309 case SM4_OPCODE_DMIN
:
310 case SM4_OPCODE_DMUL
:
315 case SM4_OPCODE_DMOV
:
316 case SM4_OPCODE_DMOVC
:
317 case SM4_OPCODE_DTOF
:
319 case SM4_OPCODE_F16TOF32
:
327 Converter::inferDstType(enum sm4_opcode op
) const
330 case SM4_OPCODE_FTOI
:
332 case SM4_OPCODE_FTOU
:
338 case SM4_OPCODE_FTOD
:
340 case SM4_OPCODE_F32TOF16
:
342 case SM4_OPCODE_ITOF
:
343 case SM4_OPCODE_UTOF
:
344 case SM4_OPCODE_DTOF
:
347 return inferSrcType(op
);
352 Converter::cvtOpcode(enum sm4_opcode op
) const
355 case SM4_OPCODE_ADD
: return OP_ADD
;
356 case SM4_OPCODE_AND
: return OP_AND
;
357 case SM4_OPCODE_BREAK
: return OP_BREAK
;
358 case SM4_OPCODE_BREAKC
: return OP_BREAK
;
359 case SM4_OPCODE_CALL
: return OP_CALL
;
360 case SM4_OPCODE_CALLC
: return OP_CALL
;
361 case SM4_OPCODE_CASE
: return OP_NOP
;
362 case SM4_OPCODE_CONTINUE
: return OP_CONT
;
363 case SM4_OPCODE_CONTINUEC
: return OP_CONT
;
364 case SM4_OPCODE_CUT
: return OP_RESTART
;
365 case SM4_OPCODE_DEFAULT
: return OP_NOP
;
366 case SM4_OPCODE_DERIV_RTX
: return OP_DFDX
;
367 case SM4_OPCODE_DERIV_RTY
: return OP_DFDY
;
368 case SM4_OPCODE_DISCARD
: return OP_DISCARD
;
369 case SM4_OPCODE_DIV
: return OP_DIV
;
370 case SM4_OPCODE_DP2
: return OP_MAD
;
371 case SM4_OPCODE_DP3
: return OP_MAD
;
372 case SM4_OPCODE_DP4
: return OP_MAD
;
373 case SM4_OPCODE_ELSE
: return OP_BRA
;
374 case SM4_OPCODE_EMIT
: return OP_EMIT
;
375 case SM4_OPCODE_EMITTHENCUT
: return OP_EMIT
;
376 case SM4_OPCODE_ENDIF
: return OP_BRA
;
377 case SM4_OPCODE_ENDLOOP
: return OP_PREBREAK
;
378 case SM4_OPCODE_ENDSWITCH
: return OP_NOP
;
379 case SM4_OPCODE_EQ
: return OP_SET
;
380 case SM4_OPCODE_EXP
: return OP_EX2
;
381 case SM4_OPCODE_FRC
: return OP_CVT
;
382 case SM4_OPCODE_FTOI
: return OP_CVT
;
383 case SM4_OPCODE_FTOU
: return OP_CVT
;
384 case SM4_OPCODE_GE
: return OP_SET
;
385 case SM4_OPCODE_IADD
: return OP_ADD
;
386 case SM4_OPCODE_IF
: return OP_BRA
;
387 case SM4_OPCODE_IEQ
: return OP_SET
;
388 case SM4_OPCODE_IGE
: return OP_SET
;
389 case SM4_OPCODE_ILT
: return OP_SET
;
390 case SM4_OPCODE_IMAD
: return OP_MAD
;
391 case SM4_OPCODE_IMAX
: return OP_MAX
;
392 case SM4_OPCODE_IMIN
: return OP_MIN
;
393 case SM4_OPCODE_IMUL
: return OP_MUL
;
394 case SM4_OPCODE_INE
: return OP_SET
;
395 case SM4_OPCODE_INEG
: return OP_NEG
;
396 case SM4_OPCODE_ISHL
: return OP_SHL
;
397 case SM4_OPCODE_ISHR
: return OP_SHR
;
398 case SM4_OPCODE_ITOF
: return OP_CVT
;
399 case SM4_OPCODE_LD
: return OP_TXF
;
400 case SM4_OPCODE_LD_MS
: return OP_TXF
;
401 case SM4_OPCODE_LOG
: return OP_LG2
;
402 case SM4_OPCODE_LOOP
: return OP_PRECONT
;
403 case SM4_OPCODE_LT
: return OP_SET
;
404 case SM4_OPCODE_MAD
: return OP_MAD
;
405 case SM4_OPCODE_MIN
: return OP_MIN
;
406 case SM4_OPCODE_MAX
: return OP_MAX
;
407 case SM4_OPCODE_MOV
: return OP_MOV
;
408 case SM4_OPCODE_MOVC
: return OP_MOV
;
409 case SM4_OPCODE_MUL
: return OP_MUL
;
410 case SM4_OPCODE_NE
: return OP_SET
;
411 case SM4_OPCODE_NOP
: return OP_NOP
;
412 case SM4_OPCODE_NOT
: return OP_NOT
;
413 case SM4_OPCODE_OR
: return OP_OR
;
414 case SM4_OPCODE_RESINFO
: return OP_TXQ
;
415 case SM4_OPCODE_RET
: return OP_RET
;
416 case SM4_OPCODE_RETC
: return OP_RET
;
417 case SM4_OPCODE_ROUND_NE
: return OP_CVT
;
418 case SM4_OPCODE_ROUND_NI
: return OP_FLOOR
;
419 case SM4_OPCODE_ROUND_PI
: return OP_CEIL
;
420 case SM4_OPCODE_ROUND_Z
: return OP_TRUNC
;
421 case SM4_OPCODE_RSQ
: return OP_RSQ
;
422 case SM4_OPCODE_SAMPLE
: return OP_TEX
;
423 case SM4_OPCODE_SAMPLE_C
: return OP_TEX
;
424 case SM4_OPCODE_SAMPLE_C_LZ
: return OP_TEX
;
425 case SM4_OPCODE_SAMPLE_L
: return OP_TXL
;
426 case SM4_OPCODE_SAMPLE_D
: return OP_TXD
;
427 case SM4_OPCODE_SAMPLE_B
: return OP_TXB
;
428 case SM4_OPCODE_SQRT
: return OP_SQRT
;
429 case SM4_OPCODE_SWITCH
: return OP_NOP
;
430 case SM4_OPCODE_SINCOS
: return OP_PRESIN
;
431 case SM4_OPCODE_UDIV
: return OP_DIV
;
432 case SM4_OPCODE_ULT
: return OP_SET
;
433 case SM4_OPCODE_UGE
: return OP_SET
;
434 case SM4_OPCODE_UMUL
: return OP_MUL
;
435 case SM4_OPCODE_UMAD
: return OP_MAD
;
436 case SM4_OPCODE_UMAX
: return OP_MAX
;
437 case SM4_OPCODE_UMIN
: return OP_MIN
;
438 case SM4_OPCODE_USHR
: return OP_SHR
;
439 case SM4_OPCODE_UTOF
: return OP_CVT
;
440 case SM4_OPCODE_XOR
: return OP_XOR
;
442 case SM4_OPCODE_GATHER4
: return OP_TXG
;
443 case SM4_OPCODE_SAMPLE_POS
: return OP_PIXLD
;
444 case SM4_OPCODE_SAMPLE_INFO
: return OP_PIXLD
;
445 case SM4_OPCODE_EMIT_STREAM
: return OP_EMIT
;
446 case SM4_OPCODE_CUT_STREAM
: return OP_RESTART
;
447 case SM4_OPCODE_EMITTHENCUT_STREAM
: return OP_EMIT
;
448 case SM4_OPCODE_INTERFACE_CALL
: return OP_CALL
;
449 case SM4_OPCODE_BUFINFO
: return OP_TXQ
;
450 case SM4_OPCODE_DERIV_RTX_COARSE
: return OP_DFDX
;
451 case SM4_OPCODE_DERIV_RTX_FINE
: return OP_DFDX
;
452 case SM4_OPCODE_DERIV_RTY_COARSE
: return OP_DFDY
;
453 case SM4_OPCODE_DERIV_RTY_FINE
: return OP_DFDY
;
454 case SM4_OPCODE_GATHER4_C
: return OP_TXG
;
455 case SM4_OPCODE_GATHER4_PO
: return OP_TXG
;
456 case SM4_OPCODE_GATHER4_PO_C
: return OP_TXG
;
458 case SM4_OPCODE_RCP
: return OP_RCP
;
459 case SM4_OPCODE_F32TOF16
: return OP_CVT
;
460 case SM4_OPCODE_F16TOF32
: return OP_CVT
;
461 case SM4_OPCODE_UADDC
: return OP_ADD
;
462 case SM4_OPCODE_USUBB
: return OP_SUB
;
463 case SM4_OPCODE_COUNTBITS
: return OP_POPCNT
;
465 case SM4_OPCODE_ATOMIC_AND
: return OP_AND
;
466 case SM4_OPCODE_ATOMIC_OR
: return OP_OR
;
467 case SM4_OPCODE_ATOMIC_XOR
: return OP_XOR
;
468 case SM4_OPCODE_ATOMIC_CMP_STORE
: return OP_STORE
;
469 case SM4_OPCODE_ATOMIC_IADD
: return OP_ADD
;
470 case SM4_OPCODE_ATOMIC_IMAX
: return OP_MAX
;
471 case SM4_OPCODE_ATOMIC_IMIN
: return OP_MIN
;
472 case SM4_OPCODE_ATOMIC_UMAX
: return OP_MAX
;
473 case SM4_OPCODE_ATOMIC_UMIN
: return OP_MIN
;
475 case SM4_OPCODE_SYNC
: return OP_MEMBAR
;
476 case SM4_OPCODE_DADD
: return OP_ADD
;
477 case SM4_OPCODE_DMAX
: return OP_MAX
;
478 case SM4_OPCODE_DMIN
: return OP_MIN
;
479 case SM4_OPCODE_DMUL
: return OP_MUL
;
480 case SM4_OPCODE_DEQ
: return OP_SET
;
481 case SM4_OPCODE_DGE
: return OP_SET
;
482 case SM4_OPCODE_DLT
: return OP_SET
;
483 case SM4_OPCODE_DNE
: return OP_SET
;
484 case SM4_OPCODE_DMOV
: return OP_MOV
;
485 case SM4_OPCODE_DMOVC
: return OP_MOV
;
486 case SM4_OPCODE_DTOF
: return OP_CVT
;
487 case SM4_OPCODE_FTOD
: return OP_CVT
;
495 Converter::getDstOpndCount(enum sm4_opcode opcode
) const
498 case SM4_OPCODE_SINCOS
:
499 case SM4_OPCODE_UDIV
:
500 case SM4_OPCODE_IMUL
:
501 case SM4_OPCODE_UMUL
:
503 case SM4_OPCODE_BREAK
:
504 case SM4_OPCODE_BREAKC
:
505 case SM4_OPCODE_CALL
:
506 case SM4_OPCODE_CALLC
:
507 case SM4_OPCODE_CONTINUE
:
508 case SM4_OPCODE_CONTINUEC
:
509 case SM4_OPCODE_DISCARD
:
510 case SM4_OPCODE_EMIT
:
511 case SM4_OPCODE_EMIT_STREAM
:
513 case SM4_OPCODE_CUT_STREAM
:
514 case SM4_OPCODE_EMITTHENCUT
:
515 case SM4_OPCODE_EMITTHENCUT_STREAM
:
517 case SM4_OPCODE_ELSE
:
518 case SM4_OPCODE_ENDIF
:
519 case SM4_OPCODE_LOOP
:
520 case SM4_OPCODE_ENDLOOP
:
522 case SM4_OPCODE_RETC
:
523 case SM4_OPCODE_SYNC
:
524 case SM4_OPCODE_SWITCH
:
525 case SM4_OPCODE_CASE
:
526 case SM4_OPCODE_HS_DECLS
:
527 case SM4_OPCODE_HS_CONTROL_POINT_PHASE
:
528 case SM4_OPCODE_HS_FORK_PHASE
:
529 case SM4_OPCODE_HS_JOIN_PHASE
:
536 #define TARG_CASE_1(a, b) case SM4_TARGET_##a: return TEX_TARGET_##b;
537 #define TARG_CASE_2(a, b) case SM4_TARGET_##a: \
538 return dc ? TEX_TARGET_##b##_SHADOW : TEX_TARGET_##b
541 Converter::cvtTexTarget(enum sm4_target targ
,
542 enum sm4_opcode op
, operation
*opr
) const
544 bool dc
= (op
== SM4_OPCODE_SAMPLE_C
||
545 op
== SM4_OPCODE_SAMPLE_C_LZ
||
546 op
== SM4_OPCODE_GATHER4_C
||
547 op
== SM4_OPCODE_GATHER4_PO_C
);
551 case SM4_TARGET_RAW_BUFFER
: *opr
= OP_LOAD
; break;
552 case SM4_TARGET_STRUCTURED_BUFFER
: *opr
= OP_SULD
; break;
560 TARG_CASE_1(UNKNOWN
, 2D
);
561 TARG_CASE_2(TEXTURE1D
, 1D
);
562 TARG_CASE_2(TEXTURE2D
, 2D
);
563 TARG_CASE_1(TEXTURE2DMS
, 2D_MS
);
564 TARG_CASE_1(TEXTURE3D
, 3D
);
565 TARG_CASE_2(TEXTURECUBE
, CUBE
);
566 TARG_CASE_2(TEXTURE1DARRAY
, 1D_ARRAY
);
567 TARG_CASE_2(TEXTURE2DARRAY
, 2D_ARRAY
);
568 TARG_CASE_1(TEXTURE2DMSARRAY
, 2D_MS_ARRAY
);
569 TARG_CASE_2(TEXTURECUBEARRAY
, CUBE_ARRAY
);
570 TARG_CASE_1(BUFFER
, BUFFER
);
571 TARG_CASE_1(RAW_BUFFER
, BUFFER
);
572 TARG_CASE_1(STRUCTURED_BUFFER
, BUFFER
);
574 assert(!"invalid SM4 texture target");
575 return dc
? TEX_TARGET_2D_SHADOW
: TEX_TARGET_2D
;
579 static inline uint32_t
580 getSVIndex(enum sm4_sv sv
)
583 case SM4_SV_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR
: return 0;
584 case SM4_SV_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR
: return 1;
585 case SM4_SV_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR
: return 2;
586 case SM4_SV_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR
: return 3;
588 case SM4_SV_FINAL_QUAD_U_INSIDE_TESSFACTOR
: return 4;
589 case SM4_SV_FINAL_QUAD_V_INSIDE_TESSFACTOR
: return 5;
591 case SM4_SV_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR
: return 0;
592 case SM4_SV_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR
: return 1;
593 case SM4_SV_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR
: return 2;
595 case SM4_SV_FINAL_TRI_INSIDE_TESSFACTOR
: return 4;
597 case SM4_SV_FINAL_LINE_DETAIL_TESSFACTOR
: return 0;
599 case SM4_SV_FINAL_LINE_DENSITY_TESSFACTOR
: return 4;
607 Converter::cvtSemantic(enum sm4_sv sv
, uint8_t &idx
) const
612 case SM4_SV_UNDEFINED
: return SV_UNDEFINED
;
613 case SM4_SV_POSITION
: return SV_POSITION
;
614 case SM4_SV_CLIP_DISTANCE
: return SV_CLIP_DISTANCE
;
615 case SM4_SV_CULL_DISTANCE
: return SV_CLIP_DISTANCE
; // XXX: distinction
616 case SM4_SV_RENDER_TARGET_ARRAY_INDEX
: return SV_LAYER
;
617 case SM4_SV_VIEWPORT_ARRAY_INDEX
: return SV_VIEWPORT_INDEX
;
618 case SM4_SV_VERTEX_ID
: return SV_VERTEX_ID
;
619 case SM4_SV_PRIMITIVE_ID
: return SV_PRIMITIVE_ID
;
620 case SM4_SV_INSTANCE_ID
: return SV_INSTANCE_ID
;
621 case SM4_SV_IS_FRONT_FACE
: return SV_FACE
;
622 case SM4_SV_SAMPLE_INDEX
: return SV_SAMPLE_INDEX
;
624 case SM4_SV_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR
:
625 case SM4_SV_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR
:
626 case SM4_SV_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR
:
627 case SM4_SV_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR
:
628 case SM4_SV_FINAL_QUAD_U_INSIDE_TESSFACTOR
:
629 case SM4_SV_FINAL_QUAD_V_INSIDE_TESSFACTOR
:
630 case SM4_SV_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR
:
631 case SM4_SV_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR
:
632 case SM4_SV_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR
:
633 case SM4_SV_FINAL_TRI_INSIDE_TESSFACTOR
:
634 case SM4_SV_FINAL_LINE_DETAIL_TESSFACTOR
:
635 case SM4_SV_FINAL_LINE_DENSITY_TESSFACTOR
:
636 idx
= getSVIndex(sv
);
637 return SV_TESS_FACTOR
;
640 assert(!"invalid SM4 system value");
646 Converter::tgsiSemantic(SVSemantic sv
, int index
)
649 case SV_POSITION
: return TGSI_SEMANTIC_POSITION
;
650 case SV_FACE
: return TGSI_SEMANTIC_FACE
;
651 case SV_LAYER
: return NV50_SEMANTIC_LAYER
;
652 case SV_VIEWPORT_INDEX
: return NV50_SEMANTIC_VIEWPORTINDEX
;
653 case SV_POINT_SIZE
: return TGSI_SEMANTIC_PSIZE
;
654 case SV_CLIP_DISTANCE
: return NV50_SEMANTIC_CLIPDISTANCE
;
655 case SV_VERTEX_ID
: return TGSI_SEMANTIC_VERTEXID
;
656 case SV_INSTANCE_ID
: return TGSI_SEMANTIC_INSTANCEID
;
657 case SV_PRIMITIVE_ID
: return TGSI_SEMANTIC_PRIMID
;
658 case SV_TESS_FACTOR
: return NV50_SEMANTIC_TESSFACTOR
;
659 case SV_TESS_COORD
: return NV50_SEMANTIC_TESSCOORD
;
660 case SV_INVOCATION_ID
: return NV50_SEMANTIC_INVOCATIONID
;
662 return TGSI_SEMANTIC_GENERIC
;
667 Converter::recordSV(unsigned sn
, unsigned si
, unsigned mask
, bool input
)
670 for (i
= 0; i
< info
.numSysVals
; ++i
)
671 if (info
.sv
[i
].sn
== sn
&&
674 info
.numSysVals
= i
+ 1;
677 info
.sv
[i
].mask
= mask
;
678 info
.sv
[i
].input
= input
? 1 : 0;
682 Converter::parseSignature()
684 struct nv50_ir_varying
*patch
;
685 unsigned int i
, r
, n
;
689 info
.numPatchConstants
= 0;
691 for (n
= 0, i
= 0; i
< sm4
.num_params_in
; ++i
) {
692 r
= sm4
.params_in
[i
].Register
;
694 info
.in
[r
].mask
|= sm4
.params_in
[i
].ReadWriteMask
;
695 // mask might be uninitialized ...
696 if (!sm4
.params_in
[i
].ReadWriteMask
)
697 info
.in
[r
].mask
= 0xf;
699 if (info
.in
[r
].regular
) // already assigned semantic name/index
701 info
.in
[r
].regular
= 1;
702 info
.in
[r
].patch
= 0;
704 info
.numInputs
= MAX2(info
.numInputs
, r
+ 1);
706 switch (sm4
.params_in
[i
].SystemValueType
) {
707 case D3D_NAME_UNDEFINED
:
708 info
.in
[r
].sn
= TGSI_SEMANTIC_GENERIC
;
711 case D3D_NAME_POSITION
:
712 info
.in
[r
].sn
= TGSI_SEMANTIC_POSITION
;
714 case D3D_NAME_VERTEX_ID
:
715 info
.in
[r
].sn
= TGSI_SEMANTIC_VERTEXID
;
717 case D3D_NAME_PRIMITIVE_ID
:
718 info
.in
[r
].sn
= TGSI_SEMANTIC_PRIMID
;
719 // no corresponding output
720 recordSV(TGSI_SEMANTIC_PRIMID
, 0, 1, true);
722 case D3D_NAME_INSTANCE_ID
:
723 info
.in
[r
].sn
= TGSI_SEMANTIC_INSTANCEID
;
725 case D3D_NAME_IS_FRONT_FACE
:
726 info
.in
[r
].sn
= TGSI_SEMANTIC_FACE
;
727 // no corresponding output
728 recordSV(TGSI_SEMANTIC_FACE
, 0, 1, true);
731 assert(!"invalid/unsupported input linkage semantic");
736 for (n
= 0, i
= 0; i
< sm4
.num_params_out
; ++i
) {
737 r
= sm4
.params_out
[i
].Register
;
739 info
.out
[r
].mask
|= ~sm4
.params_out
[i
].ReadWriteMask
;
741 if (info
.out
[r
].regular
) // already assigned semantic name/index
743 info
.out
[r
].regular
= 1;
744 info
.out
[r
].patch
= 0;
746 info
.numOutputs
= MAX2(info
.numOutputs
, r
+ 1);
748 switch (sm4
.params_out
[i
].SystemValueType
) {
749 case D3D_NAME_UNDEFINED
:
750 if (prog
->getType() == Program::TYPE_FRAGMENT
) {
751 info
.out
[r
].sn
= TGSI_SEMANTIC_COLOR
;
752 info
.out
[r
].si
= info
.prop
.fp
.numColourResults
++;
754 info
.out
[r
].sn
= TGSI_SEMANTIC_GENERIC
;
755 info
.out
[r
].si
= n
++;
758 case D3D_NAME_POSITION
:
760 case D3D_NAME_DEPTH_GREATER_EQUAL
:
761 case D3D_NAME_DEPTH_LESS_EQUAL
:
762 info
.out
[r
].sn
= TGSI_SEMANTIC_POSITION
;
763 info
.io
.fragDepth
= r
;
765 case D3D_NAME_CULL_DISTANCE
:
766 case D3D_NAME_CLIP_DISTANCE
:
767 info
.out
[r
].sn
= NV50_SEMANTIC_CLIPDISTANCE
;
768 info
.out
[r
].si
= sm4
.params_out
[i
].SemanticIndex
;
770 case D3D_NAME_RENDER_TARGET_ARRAY_INDEX
:
771 info
.out
[r
].sn
= NV50_SEMANTIC_LAYER
;
773 case D3D_NAME_VIEWPORT_ARRAY_INDEX
:
774 info
.out
[r
].sn
= NV50_SEMANTIC_VIEWPORTINDEX
;
776 case D3D_NAME_PRIMITIVE_ID
:
777 info
.out
[r
].sn
= TGSI_SEMANTIC_PRIMID
;
779 case D3D_NAME_TARGET
:
780 info
.out
[r
].sn
= TGSI_SEMANTIC_COLOR
;
781 info
.out
[r
].si
= sm4
.params_out
[i
].SemanticIndex
;
783 case D3D_NAME_COVERAGE
:
784 info
.out
[r
].sn
= NV50_SEMANTIC_SAMPLEMASK
;
785 info
.io
.sampleMask
= r
;
787 case D3D_NAME_SAMPLE_INDEX
:
789 assert(!"invalid/unsupported output linkage semantic");
794 if (prog
->getType() == Program::TYPE_TESSELLATION_EVAL
)
795 patch
= &info
.in
[info
.numInputs
];
797 patch
= &info
.out
[info
.numOutputs
];
799 for (n
= 0, i
= 0; i
< sm4
.num_params_patch
; ++i
) {
800 r
= sm4
.params_patch
[i
].Register
;
802 patch
[r
].mask
|= sm4
.params_patch
[i
].Mask
;
804 if (patch
[r
].regular
) // already visited
806 patch
[r
].regular
= 1;
809 info
.numPatchConstants
= MAX2(info
.numPatchConstants
, r
+ 1);
811 switch (sm4
.params_patch
[i
].SystemValueType
) {
812 case D3D_NAME_UNDEFINED
:
813 patch
[r
].sn
= TGSI_SEMANTIC_GENERIC
;
816 case D3D_NAME_FINAL_QUAD_EDGE_TESSFACTOR
:
817 case D3D_NAME_FINAL_TRI_EDGE_TESSFACTOR
:
818 case D3D_NAME_FINAL_LINE_DETAIL_TESSFACTOR
:
819 patch
[r
].sn
= NV50_SEMANTIC_TESSFACTOR
;
820 patch
[r
].si
= sm4
.params_patch
[i
].SemanticIndex
;
822 case D3D_NAME_FINAL_QUAD_INSIDE_TESSFACTOR
:
823 case D3D_NAME_FINAL_TRI_INSIDE_TESSFACTOR
:
824 case D3D_NAME_FINAL_LINE_DENSITY_TESSFACTOR
:
825 patch
[r
].sn
= NV50_SEMANTIC_TESSFACTOR
;
826 patch
[r
].si
= sm4
.params_patch
[i
].SemanticIndex
+ 4;
829 assert(!"invalid patch-constant linkage semantic");
833 if (prog
->getType() == Program::TYPE_TESSELLATION_EVAL
)
834 info
.numInputs
+= info
.numPatchConstants
;
836 info
.numOutputs
+= info
.numPatchConstants
;
842 Converter::inspectDeclaration(const sm4_dcl
& dcl
)
845 enum sm4_interpolation ipa_mode
;
847 if (dcl
.op
.get() && dcl
.op
->is_index_simple(0))
848 idx
= dcl
.op
->indices
[0].disp
;
850 switch (dcl
.opcode
) {
851 case SM4_OPCODE_DCL_SAMPLER
:
853 shadow
[idx
] = dcl
.dcl_sampler
.shadow
;
855 case SM4_OPCODE_DCL_RESOURCE
:
857 enum sm4_target targ
= (enum sm4_target
)dcl
.dcl_resource
.target
;
859 assert(idx
>= 0 && idx
< NV50_IR_MAX_RESOURCES
);
860 resourceType
[idx
][0] = cvtTexTarget(targ
, SM4_OPCODE_SAMPLE
, NULL
);
861 resourceType
[idx
][1] = cvtTexTarget(targ
, SM4_OPCODE_SAMPLE_C
, NULL
);
864 case SM4_OPCODE_DCL_CONSTANT_BUFFER
:
867 case SM4_OPCODE_CUSTOMDATA
:
868 info
.immd
.bufSize
= dcl
.num
* 4;
869 info
.immd
.buf
= (uint32_t *)MALLOC(info
.immd
.bufSize
);
870 memcpy(info
.immd
.buf
, dcl
.data
, info
.immd
.bufSize
);
872 case SM4_OPCODE_DCL_INDEX_RANGE
:
875 case SM4_OPCODE_DCL_INPUT_PS_SGV
:
876 case SM4_OPCODE_DCL_INPUT_PS_SIV
:
877 case SM4_OPCODE_DCL_INPUT_PS
:
879 assert(idx
>= 0 && idx
< info
.numInputs
);
880 ipa_mode
= (enum sm4_interpolation
)dcl
.dcl_input_ps
.interpolation
;
881 interpMode
[idx
] = cvtInterpMode(ipa_mode
);
882 setVaryingInterpMode(&info
.in
[idx
], interpMode
[idx
]);
885 case SM4_OPCODE_DCL_INPUT_SGV
:
886 case SM4_OPCODE_DCL_INPUT_SIV
:
887 case SM4_OPCODE_DCL_INPUT
:
888 if (dcl
.op
->file
== SM4_FILE_INPUT_DOMAIN_POINT
) {
889 idx
= info
.numInputs
++;
890 info
.in
[idx
].sn
= NV50_SEMANTIC_TESSCOORD
;
891 info
.in
[idx
].mask
= dcl
.op
->mask
;
893 // rest handled in parseSignature
895 case SM4_OPCODE_DCL_OUTPUT_SGV
:
896 case SM4_OPCODE_DCL_OUTPUT_SIV
:
898 case SM4_SV_POSITION
:
899 assert(prog
->getType() != Program::TYPE_FRAGMENT
);
901 case SM4_SV_CULL_DISTANCE
: // XXX: order ?
902 info
.io
.cullDistanceMask
|= 1 << info
.io
.clipDistanceMask
;
904 case SM4_SV_CLIP_DISTANCE
:
905 info
.io
.clipDistanceMask
++; // abuse as count
910 switch (dcl
.op
->file
) {
911 case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL
:
912 case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL
:
913 case SM4_FILE_OUTPUT_DEPTH
:
914 if (info
.io
.fragDepth
< 0xff)
916 idx
= info
.io
.fragDepth
= info
.numOutputs
++;
917 info
.out
[idx
].sn
= TGSI_SEMANTIC_POSITION
;
919 case SM4_FILE_OUTPUT_COVERAGE_MASK
:
920 if (info
.io
.sampleMask
< 0xff)
922 idx
= info
.io
.sampleMask
= info
.numOutputs
++;
923 info
.out
[idx
].sn
= NV50_SEMANTIC_SAMPLEMASK
;
929 case SM4_OPCODE_DCL_OUTPUT
:
930 // handled in parseSignature
932 case SM4_OPCODE_DCL_TEMPS
:
933 nrRegVals
+= dcl
.num
;
935 case SM4_OPCODE_DCL_INDEXABLE_TEMP
:
938 case SM4_OPCODE_DCL_GLOBAL_FLAGS
:
939 if (prog
->getType() == Program::TYPE_FRAGMENT
)
940 info
.prop
.fp
.earlyFragTests
= dcl
.dcl_global_flags
.early_depth_stencil
;
943 case SM4_OPCODE_DCL_FUNCTION_BODY
:
945 case SM4_OPCODE_DCL_FUNCTION_TABLE
:
947 case SM4_OPCODE_DCL_INTERFACE
:
951 case SM4_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY
:
952 info
.prop
.gp
.outputPrim
= g3dPrim(
953 dcl
.dcl_gs_output_primitive_topology
.primitive_topology
);
955 case SM4_OPCODE_DCL_GS_INPUT_PRIMITIVE
:
956 info
.prop
.gp
.inputPrim
= g3dPrim(dcl
.dcl_gs_input_primitive
.primitive
);
958 case SM4_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT
:
959 info
.prop
.gp
.maxVertices
= dcl
.num
;
961 case SM4_OPCODE_DCL_GS_INSTANCE_COUNT
:
962 info
.prop
.gp
.instanceCount
= dcl
.num
;
964 case SM4_OPCODE_DCL_STREAM
:
968 case SM4_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT
:
969 info
.prop
.tp
.inputPatchSize
=
970 dcl
.dcl_input_control_point_count
.control_points
;
972 case SM4_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT
:
973 info
.prop
.tp
.outputPatchSize
=
974 dcl
.dcl_output_control_point_count
.control_points
;
976 case SM4_OPCODE_DCL_TESS_DOMAIN
:
977 switch (dcl
.dcl_tess_domain
.domain
) {
978 case D3D_TESSELLATOR_DOMAIN_ISOLINE
:
979 info
.prop
.tp
.domain
= PIPE_PRIM_LINES
;
981 case D3D_TESSELLATOR_DOMAIN_TRI
:
982 info
.prop
.tp
.domain
= PIPE_PRIM_TRIANGLES
;
984 case D3D_TESSELLATOR_DOMAIN_QUAD
:
985 info
.prop
.tp
.domain
= PIPE_PRIM_QUADS
;
987 case D3D_TESSELLATOR_DOMAIN_UNDEFINED
:
989 info
.prop
.tp
.domain
= PIPE_PRIM_MAX
;
993 case SM4_OPCODE_DCL_TESS_PARTITIONING
:
994 switch (dcl
.dcl_tess_partitioning
.partitioning
) {
995 case D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD
:
996 info
.prop
.tp
.partitioning
= NV50_TESS_PART_FRACT_ODD
;
998 case D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN
:
999 info
.prop
.tp
.partitioning
= NV50_TESS_PART_FRACT_EVEN
;
1001 case D3D_TESSELLATOR_PARTITIONING_POW2
:
1002 info
.prop
.tp
.partitioning
= NV50_TESS_PART_POW2
;
1004 case D3D_TESSELLATOR_PARTITIONING_INTEGER
:
1005 case D3D_TESSELLATOR_PARTITIONING_UNDEFINED
:
1007 info
.prop
.tp
.partitioning
= NV50_TESS_PART_INTEGER
;
1011 case SM4_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE
:
1012 switch (dcl
.dcl_tess_output_primitive
.primitive
) {
1013 case D3D_TESSELLATOR_OUTPUT_LINE
:
1014 info
.prop
.tp
.outputPrim
= PIPE_PRIM_LINES
;
1016 case D3D_TESSELLATOR_OUTPUT_TRIANGLE_CW
:
1017 info
.prop
.tp
.outputPrim
= PIPE_PRIM_TRIANGLES
;
1018 info
.prop
.tp
.winding
= +1;
1020 case D3D_TESSELLATOR_OUTPUT_TRIANGLE_CCW
:
1021 info
.prop
.tp
.outputPrim
= PIPE_PRIM_TRIANGLES
;
1022 info
.prop
.tp
.winding
= -1;
1024 case D3D_TESSELLATOR_OUTPUT_POINT
:
1025 info
.prop
.tp
.outputPrim
= PIPE_PRIM_POINTS
;
1027 case D3D_TESSELLATOR_OUTPUT_UNDEFINED
:
1029 info
.prop
.tp
.outputPrim
= PIPE_PRIM_MAX
;
1034 case SM4_OPCODE_HS_FORK_PHASE
:
1038 case SM4_OPCODE_HS_JOIN_PHASE
:
1042 case SM4_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT
:
1043 case SM4_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT
:
1044 case SM4_OPCODE_DCL_HS_MAX_TESSFACTOR
:
1048 case SM4_OPCODE_DCL_THREAD_GROUP
:
1049 case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED
:
1050 case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW
:
1051 case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED
:
1052 case SM4_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW
:
1053 case SM4_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED
:
1054 case SM4_OPCODE_DCL_RESOURCE_RAW
:
1055 case SM4_OPCODE_DCL_RESOURCE_STRUCTURED
:
1056 ERROR("unhandled declaration\n");
1061 assert(!"invalid SM4 declaration");
1068 Converter::allocateValues()
1070 lData
= new DataArray
[nrArrays
];
1072 for (unsigned int i
= 0; i
< nrArrays
; ++i
)
1073 lData
[i
].setParent(this);
1075 tData32
.setup(0, nrRegVals
, 4, 4, FILE_GPR
);
1076 tData64
.setup(0, nrRegVals
, 2, 8, FILE_GPR
);
1078 if (prog
->getType() == Program::TYPE_FRAGMENT
)
1079 oData
.setup(0, info
.numOutputs
, 4, 4, FILE_GPR
);
1082 bool Converter::handleDeclaration(const sm4_dcl
& dcl
)
1084 switch (dcl
.opcode
) {
1085 case SM4_OPCODE_DCL_INDEXABLE_TEMP
:
1086 lData
[nrArrays
++].setup(arrayVol
,
1087 dcl
.indexable_temp
.num
, dcl
.indexable_temp
.comps
,
1088 4, FILE_MEMORY_LOCAL
);
1089 arrayVol
+= dcl
.indexable_temp
.num
* dcl
.indexable_temp
.comps
* 4;
1091 case SM4_OPCODE_HS_FORK_PHASE
:
1093 phaseInstCnt
[0][subPhaseCnt
[0]] = phaseInstCnt
[0][subPhaseCnt
[0] - 1];
1096 case SM4_OPCODE_HS_JOIN_PHASE
:
1098 phaseInstCnt
[1][subPhaseCnt
[1]] = phaseInstCnt
[1][subPhaseCnt
[1] - 1];
1101 case SM4_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT
:
1102 phaseInstCnt
[0][subPhaseCnt
[0] - 1] = dcl
.num
;
1104 case SM4_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT
:
1105 phaseInstCnt
[1][subPhaseCnt
[1] - 1] = dcl
.num
;
1109 break; // already handled in inspection
1116 Converter::iSym(int i
, int c
)
1118 if (info
.in
[i
].regular
) {
1119 return mkSymbol(FILE_SHADER_INPUT
, 0, sTy
, info
.in
[i
].slot
[c
] * 4);
1121 return mkSysVal(tgsi::irSemantic(info
.in
[i
].sn
), info
.in
[i
].si
);
1126 Converter::oSym(int i
, int c
)
1128 if (info
.out
[i
].regular
) {
1129 return mkSymbol(FILE_SHADER_OUTPUT
, 0, dTy
, info
.out
[i
].slot
[c
] * 4);
1131 return mkSysVal(tgsi::irSemantic(info
.out
[i
].sn
), info
.out
[i
].si
);
1136 Converter::getSrcPtr(int s
, int dim
, int shl
)
1139 return srcPtr
[s
][dim
];
1141 sm4_op
*op
= insn
->ops
[s
+ nDstOpnds
]->indices
[dim
].reg
.get();
1146 Value
*index
= src(*op
, 0, s
);
1148 srcPtr
[s
][dim
] = index
;
1150 srcPtr
[s
][dim
] = mkOp2v(OP_SHL
, TYPE_U32
, getSSA(), index
, mkImm(shl
));
1151 return srcPtr
[s
][dim
];
1155 Converter::getDstPtr(int d
, int dim
, int shl
)
1161 sm4_op
*op
= insn
->ops
[d
]->indices
[dim
].reg
.get();
1165 Value
*index
= src(*op
, 0, d
);
1167 index
= mkOp2v(OP_SHL
, TYPE_U32
, getSSA(), index
, mkImm(shl
));
1169 return (dstPtr
[dim
] = index
);
1173 Converter::getVtxPtr(int s
)
1179 sm4_op
*op
= insn
->ops
[s
+ nDstOpnds
].get();
1182 int idx
= op
->indices
[0].disp
;
1184 vtxBase
[s
] = getSrcPtr(s
, 0, 0);
1185 vtxBase
[s
] = mkOp2v(OP_PFETCH
, TYPE_U32
, getSSA(), mkImm(idx
), vtxBase
[s
]);
1190 Converter::src(int i
, int c
)
1192 return src(*insn
->ops
[i
+ nDstOpnds
], c
, i
);
1196 Converter::dst(int i
, int c
)
1198 return dst(*insn
->ops
[i
], c
, i
);
1202 Converter::saveDst(int i
, int c
, Value
*value
)
1205 mkOp1(OP_SAT
, dTy
, value
, value
);
1206 return saveDst(*insn
->ops
[i
], c
, value
, i
);
1210 Converter::interpolate(const sm4_op
& op
, int c
, int i
)
1212 int idx
= op
.indices
[0].disp
;
1213 int swz
= op
.swizzle
[c
];
1215 (info
.in
[idx
].linear
|| info
.in
[idx
].flat
) ? OP_LINTERP
: OP_PINTERP
;
1217 Value
*ptr
= getSrcPtr(i
, 0, 4);
1219 Instruction
*insn
= new_Instruction(func
, opr
, TYPE_F32
);
1221 insn
->setDef(0, getScratch());
1222 insn
->setSrc(0, iSym(idx
, swz
));
1223 if (opr
== OP_PINTERP
)
1224 insn
->setSrc(1, fragCoord
[3]);
1226 insn
->setIndirect(0, 0, ptr
);
1228 insn
->setInterpolate(interpMode
[idx
]);
1230 bb
->insertTail(insn
);
1231 return insn
->getDef(0);
1235 Converter::src(const sm4_op
& op
, int c
, int s
)
1237 const int size
= typeSizeof(sTy
);
1240 Value
*res
, *ptr
, *vtx
;
1242 const int swz
= op
.swizzle
[c
];
1245 case SM4_FILE_IMMEDIATE32
:
1246 res
= loadImm(NULL
, (uint32_t)op
.imm_values
[swz
].u32
);
1248 case SM4_FILE_IMMEDIATE64
:
1250 res
= loadImm(NULL
, op
.imm_values
[swz
].u64
);
1253 assert(op
.is_index_simple(0));
1254 idx
= op
.indices
[0].disp
;
1256 res
= tData64
.load(idx
, swz
, NULL
);
1258 res
= tData32
.load(idx
, swz
, NULL
);
1260 case SM4_FILE_INPUT
:
1261 case SM4_FILE_INPUT_CONTROL_POINT
:
1262 case SM4_FILE_INPUT_PATCH_CONSTANT
:
1263 if (prog
->getType() == Program::TYPE_FRAGMENT
)
1264 return interpolate(op
, c
, s
);
1267 if (op
.file
== SM4_FILE_INPUT_PATCH_CONSTANT
)
1268 idx
= info
.numInputs
- info
.numPatchConstants
;
1270 if (op
.num_indices
== 2) {
1272 ptr
= getSrcPtr(s
, 1, 4);
1273 idx
+= op
.indices
[1].disp
;
1275 ld
= mkOp1(OP_VFETCH
, TYPE_U32
, res
, iSym(idx
, swz
));
1276 ld
->setIndirect(0, 0, ptr
);
1277 ld
->setIndirect(0, 1, vtx
);
1279 idx
+= op
.indices
[0].disp
;
1280 res
= mkLoad(sTy
, iSym(idx
, swz
), getSrcPtr(s
, 0, 4));
1282 if (op
.file
== SM4_FILE_INPUT_PATCH_CONSTANT
)
1283 res
->defs
->getInsn()->perPatch
= 1;
1285 case SM4_FILE_CONSTANT_BUFFER
:
1286 assert(op
.num_indices
== 2);
1287 assert(op
.is_index_simple(0));
1289 ptr
= getSrcPtr(s
, 1, 4);
1290 dim
= op
.indices
[0].disp
;
1291 off
= (op
.indices
[1].disp
* 4 + swz
) * (sTy
== TYPE_F64
? 8 : 4);
1293 res
= mkLoad(sTy
, mkSymbol(FILE_MEMORY_CONST
, dim
, sTy
, off
), ptr
);
1295 case SM4_FILE_IMMEDIATE_CONSTANT_BUFFER
:
1296 ptr
= getSrcPtr(s
, 0, 4);
1297 off
= (op
.indices
[0].disp
* 4 + swz
) * 4;
1298 res
= mkLoad(sTy
, mkSymbol(FILE_MEMORY_CONST
, 14, sTy
, off
), ptr
);
1300 case SM4_FILE_INDEXABLE_TEMP
:
1302 assert(op
.is_index_simple(0));
1303 int a
= op
.indices
[0].disp
;
1304 idx
= op
.indices
[1].disp
;
1305 res
= lData
[a
].load(idx
, swz
, getSrcPtr(s
, 1, 4));
1308 case SM4_FILE_INPUT_PRIMITIVEID
:
1309 recordSV(TGSI_SEMANTIC_PRIMID
, 0, 1, true);
1310 res
= mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_PRIMITIVE_ID
, 0));
1312 case SM4_FILE_INPUT_GS_INSTANCE_ID
:
1313 case SM4_FILE_OUTPUT_CONTROL_POINT_ID
:
1314 recordSV(NV50_SEMANTIC_INVOCATIONID
, 0, 1, true);
1315 res
= mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_INVOCATION_ID
, 0));
1317 case SM4_FILE_CYCLE_COUNTER
:
1319 mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_CLOCK
, swz
? 1 : 0));
1321 case SM4_FILE_INPUT_FORK_INSTANCE_ID
:
1322 case SM4_FILE_INPUT_JOIN_INSTANCE_ID
:
1324 phaseInstanceUsed
= true;
1326 return loadImm(NULL
, phaseInstance
);
1327 const unsigned int cnt
= phaseInstCnt
[phase
- 1][subPhase
];
1329 res
= mkOp1v(OP_RDSV
, TYPE_U32
, res
, mkSysVal(SV_INVOCATION_ID
, 0));
1330 res
= mkOp2v(OP_MIN
, TYPE_U32
, res
, res
, loadImm(NULL
, cnt
- 1));
1333 case SM4_FILE_INPUT_DOMAIN_POINT
:
1335 res
= domainPt
[swz
];
1337 case SM4_FILE_THREAD_GROUP_SHARED_MEMORY
:
1338 off
= (op
.indices
[0].disp
* 4 + swz
) * (sTy
== TYPE_F64
? 8 : 4);
1339 ptr
= getSrcPtr(s
, 0, 4);
1340 res
= mkLoad(sTy
, mkSymbol(FILE_MEMORY_SHARED
, 0, sTy
, off
), ptr
);
1342 case SM4_FILE_RESOURCE
:
1343 case SM4_FILE_SAMPLER
:
1344 case SM4_FILE_UNORDERED_ACCESS_VIEW
:
1346 case SM4_FILE_INPUT_THREAD_ID
:
1347 res
= mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_TID
, swz
));
1349 case SM4_FILE_INPUT_THREAD_GROUP_ID
:
1350 res
= mkOp1v(OP_RDSV
, TYPE_U32
, getSSA(), mkSysVal(SV_CTAID
, swz
));
1352 case SM4_FILE_FUNCTION_INPUT
:
1353 case SM4_FILE_INPUT_THREAD_ID_IN_GROUP
:
1354 assert(!"unhandled source file");
1357 assert(!"invalid source file");
1362 res
= mkOp1v(OP_ABS
, sTy
, getSSA(res
->reg
.size
), res
);
1364 res
= mkOp1v(OP_NEG
, sTy
, getSSA(res
->reg
.size
), res
);
1369 Converter::dst(const sm4_op
&op
, int c
, int i
)
1373 return tData32
.acquire(op
.indices
[0].disp
, c
);
1374 case SM4_FILE_INDEXABLE_TEMP
:
1375 return getScratch();
1376 case SM4_FILE_OUTPUT
:
1377 if (prog
->getType() == Program::TYPE_FRAGMENT
)
1378 return oData
.acquire(op
.indices
[0].disp
, c
);
1379 return getScratch();
1382 case SM4_FILE_OUTPUT_DEPTH
:
1383 case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL
:
1384 case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL
:
1385 case SM4_FILE_OUTPUT_COVERAGE_MASK
:
1386 return getScratch();
1387 case SM4_FILE_IMMEDIATE32
:
1388 case SM4_FILE_IMMEDIATE64
:
1389 case SM4_FILE_CONSTANT_BUFFER
:
1390 case SM4_FILE_RESOURCE
:
1391 case SM4_FILE_SAMPLER
:
1392 case SM4_FILE_UNORDERED_ACCESS_VIEW
:
1393 assert(!"invalid destination file");
1396 assert(!"invalid file");
1402 Converter::saveFragDepth(operation op
, Value
*value
)
1404 if (op
== OP_MIN
|| op
== OP_MAX
) {
1406 zIn
= mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), mkSysVal(SV_POSITION
, 2));
1407 value
= mkOp2v(op
, TYPE_F32
, getSSA(), value
, zIn
);
1409 oData
.store(info
.io
.fragDepth
, 2, NULL
, value
);
1413 Converter::saveDst(const sm4_op
&op
, int c
, Value
*value
, int s
)
1421 idx
= op
.indices
[0].disp
;
1422 tData32
.store(idx
, c
, NULL
, value
);
1424 case SM4_FILE_INDEXABLE_TEMP
:
1425 a
= op
.indices
[0].disp
;
1426 idx
= op
.indices
[1].disp
;
1427 // FIXME: shift is wrong, depends in lData
1428 lData
[a
].store(idx
, c
, getDstPtr(s
, 1, 4), value
);
1430 case SM4_FILE_OUTPUT
:
1431 assert(op
.num_indices
== 1);
1432 idx
= op
.indices
[0].disp
;
1433 if (prog
->getType() == Program::TYPE_FRAGMENT
) {
1434 oData
.store(idx
, c
, NULL
, value
);
1437 idx
+= info
.numOutputs
- info
.numPatchConstants
;
1438 const int shl
= (info
.out
[idx
].sn
== NV50_SEMANTIC_TESSFACTOR
) ? 2 : 4;
1440 if (sym
->reg
.file
== FILE_SHADER_OUTPUT
)
1441 st
= mkStore(OP_EXPORT
, dTy
, sym
, getDstPtr(s
, 0, shl
), value
);
1443 st
= mkStore(OP_WRSV
, dTy
, sym
, getDstPtr(s
, 0, 2), value
);
1444 st
->perPatch
= phase
? 1 : 0;
1447 case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL
:
1448 saveFragDepth(OP_MAX
, value
);
1450 case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL
:
1451 saveFragDepth(OP_MIN
, value
);
1453 case SM4_FILE_OUTPUT_DEPTH
:
1454 saveFragDepth(OP_NOP
, value
);
1456 case SM4_FILE_OUTPUT_COVERAGE_MASK
:
1457 oData
.store(info
.io
.sampleMask
, 0, NULL
, value
);
1459 case SM4_FILE_IMMEDIATE32
:
1460 case SM4_FILE_IMMEDIATE64
:
1461 case SM4_FILE_INPUT
:
1462 case SM4_FILE_CONSTANT_BUFFER
:
1463 case SM4_FILE_RESOURCE
:
1464 case SM4_FILE_SAMPLER
:
1465 assert(!"invalid destination file");
1468 assert(!"invalid file");
1474 Converter::emitTex(Value
*dst0
[4], TexInstruction
*tex
, const uint8_t swz
[4])
1476 Value
*res
[4] = { NULL
, NULL
, NULL
, NULL
};
1479 for (c
= 0; c
< 4; ++c
)
1481 tex
->tex
.mask
|= 1 << swz
[c
];
1482 for (d
= 0, c
= 0; c
< 4; ++c
)
1483 if (tex
->tex
.mask
& (1 << c
))
1484 tex
->setDef(d
++, (res
[c
] = getScratch()));
1486 bb
->insertTail(tex
);
1488 if (insn
->opcode
== SM4_OPCODE_RESINFO
) {
1489 if (tex
->tex
.target
.getDim() == 1) {
1490 res
[2] = loadImm(NULL
, 0);
1491 if (!tex
->tex
.target
.isArray())
1494 if (tex
->tex
.target
.getDim() == 2 && !tex
->tex
.target
.isArray()) {
1495 res
[2] = loadImm(NULL
, 0);
1497 for (c
= 0; c
< 4; ++c
) {
1500 Value
*src
= res
[swz
[c
]];
1502 switch (insn
->insn
.resinfo_return_type
) {
1504 mkCvt(OP_CVT
, TYPE_F32
, dst0
[c
], TYPE_U32
, src
);
1507 mkCvt(OP_CVT
, TYPE_F32
, dst0
[c
], TYPE_U32
, src
);
1508 if (swz
[c
] < tex
->tex
.target
.getDim())
1509 mkOp1(OP_RCP
, TYPE_F32
, dst0
[c
], dst0
[c
]);
1512 mkMov(dst0
[c
], src
);
1517 for (c
= 0; c
< 4; ++c
)
1519 mkMov(dst0
[c
], res
[swz
[c
]]);
1524 Converter::handleQUERY(Value
*dst0
[4], enum TexQuery query
)
1526 TexInstruction
*texi
= new_TexInstruction(func
, OP_TXQ
);
1527 texi
->tex
.query
= query
;
1529 assert(insn
->ops
[2]->file
== SM4_FILE_RESOURCE
); // TODO: UAVs
1531 const int rOp
= (query
== TXQ_DIMS
) ? 2 : 1;
1532 const int sOp
= (query
== TXQ_DIMS
) ? 0 : 1;
1534 const int tR
= insn
->ops
[rOp
]->indices
[0].disp
;
1536 texi
->setTexture(resourceType
[tR
][0], tR
, 0);
1538 texi
->setSrc(0, src(sOp
, 0)); // mip level or sample index
1540 emitTex(dst0
, texi
, insn
->ops
[rOp
]->swizzle
);
1544 Converter::handleLOAD(Value
*dst0
[4])
1546 TexInstruction
*texi
= new_TexInstruction(func
, OP_TXF
);
1549 const int tR
= insn
->ops
[2]->indices
[0].disp
;
1551 texi
->setTexture(resourceType
[tR
][0], tR
, 0);
1553 for (c
= 0; c
< texi
->tex
.target
.getArgCount(); ++c
)
1554 texi
->setSrc(c
, src(0, c
));
1556 if (texi
->tex
.target
== TEX_TARGET_BUFFER
) {
1557 texi
->tex
.levelZero
= true;
1559 texi
->setSrc(c
++, src(0, 3));
1560 for (c
= 0; c
< 3; ++c
) {
1561 texi
->tex
.offset
[0][c
] = insn
->sample_offset
[c
];
1562 if (texi
->tex
.offset
[0][c
])
1563 texi
->tex
.useOffsets
= 1;
1567 emitTex(dst0
, texi
, insn
->ops
[2]->swizzle
);
1570 // order of nv50 ir sources: x y z/layer lod/bias dc
1572 Converter::handleSAMPLE(operation opr
, Value
*dst0
[4])
1574 TexInstruction
*texi
= new_TexInstruction(func
, opr
);
1576 Value
*arg
[4], *src0
[4];
1578 Value
*lod
= NULL
, *dc
= NULL
;
1580 const int tR
= insn
->ops
[2]->indices
[0].disp
;
1581 const int tS
= insn
->ops
[3]->indices
[0].disp
;
1583 TexInstruction::Target tgt
= resourceType
[tR
][shadow
[tS
] ? 1 : 0];
1585 for (c
= 0; c
< tgt
.getArgCount(); ++c
)
1586 arg
[c
] = src0
[c
] = src(0, c
);
1588 if (insn
->opcode
== SM4_OPCODE_SAMPLE_L
||
1589 insn
->opcode
== SM4_OPCODE_SAMPLE_B
) {
1592 if (insn
->opcode
== SM4_OPCODE_SAMPLE_C
||
1593 insn
->opcode
== SM4_OPCODE_SAMPLE_C_LZ
) {
1595 if (insn
->opcode
== SM4_OPCODE_SAMPLE_C_LZ
)
1596 texi
->tex
.levelZero
= true;
1598 if (insn
->opcode
== SM4_OPCODE_SAMPLE_D
) {
1599 for (c
= 0; c
< tgt
.getDim(); ++c
) {
1600 texi
->dPdx
[c
] = src(3, c
);
1601 texi
->dPdy
[c
] = src(4, c
);
1606 for (c
= 0; c
< 3; ++c
)
1607 src0
[c
] = mkOp1v(OP_ABS
, TYPE_F32
, getSSA(), arg
[c
]);
1609 mkOp2(OP_MAX
, TYPE_F32
, val
, src0
[0], src0
[1]);
1610 mkOp2(OP_MAX
, TYPE_F32
, val
, src0
[2], val
);
1611 mkOp1(OP_RCP
, TYPE_F32
, val
, val
);
1612 for (c
= 0; c
< 3; ++c
)
1613 src0
[c
] = mkOp2v(OP_MUL
, TYPE_F32
, getSSA(), arg
[c
], val
);
1616 for (s
= 0; s
< tgt
.getArgCount(); ++s
)
1617 texi
->setSrc(s
, src0
[s
]);
1619 texi
->setSrc(s
++, lod
);
1621 texi
->setSrc(s
++, dc
);
1623 for (c
= 0; c
< 3; ++c
) {
1624 texi
->tex
.offset
[0][c
] = insn
->sample_offset
[c
];
1625 if (texi
->tex
.offset
[0][c
])
1626 texi
->tex
.useOffsets
= 1;
1629 texi
->setTexture(tgt
, tR
, tS
);
1631 emitTex(dst0
, texi
, insn
->ops
[2]->swizzle
);
1635 Converter::handleDP(Value
*dst0
[4], int dim
)
1637 Value
*src0
= src(0, 0), *src1
= src(1, 0);
1638 Value
*dotp
= getScratch();
1642 mkOp2(OP_MUL
, TYPE_F32
, dotp
, src0
, src1
);
1643 for (int c
= 1; c
< dim
; ++c
)
1644 mkOp3(OP_MAD
, TYPE_F32
, dotp
, src(0, c
), src(1, c
), dotp
);
1646 for (int c
= 0; c
< 4; ++c
)
1651 Converter::insertConvergenceOps(BasicBlock
*conv
, BasicBlock
*fork
)
1653 FlowInstruction
*join
= new_FlowInstruction(func
, OP_JOIN
, NULL
);
1655 conv
->insertHead(join
);
1657 fork
->joinAt
= new_FlowInstruction(func
, OP_JOINAT
, conv
);
1658 fork
->insertBefore(fork
->getExit(), fork
->joinAt
);
1662 Converter::finalizeShader()
1666 BasicBlock
*epilogue
= reinterpret_cast<BasicBlock
*>(leaveBBs
.pop().u
.p
);
1671 bb
->cfg
.attach(&epilogue
->cfg
, Graph::Edge::TREE
);
1672 setPosition(epilogue
, true);
1674 if (prog
->getType() == Program::TYPE_FRAGMENT
)
1677 mkOp(OP_EXIT
, TYPE_NONE
, NULL
)->terminator
= 1;
1680 #define FOR_EACH_DST0_ENABLED_CHANNEL32(chan) \
1681 for ((chan) = 0; (chan) < 4; ++(chan)) \
1682 if (insn->ops[0].get()->mask & (1 << (chan)))
1684 #define FOR_EACH_DST0_ENABLED_CHANNEL64(chan) \
1685 for ((chan) = 0; (chan) < 2; ++(chan)) \
1686 if (insn->ops[0].get()->mask & (1 << (chan)))
1689 Converter::checkDstSrcAliasing() const
1691 for (unsigned int d
= 0; d
< nDstOpnds
; ++d
) {
1692 for (unsigned int s
= nDstOpnds
; s
< insn
->num_ops
; ++s
) {
1693 if (insn
->ops
[d
]->file
!= insn
->ops
[s
]->file
)
1695 int i
= insn
->ops
[s
]->num_indices
- 1;
1696 if (i
!= insn
->ops
[d
]->num_indices
- 1)
1698 if (insn
->ops
[d
]->is_index_simple(i
) &&
1699 insn
->ops
[s
]->is_index_simple(i
) &&
1700 insn
->ops
[d
]->indices
[i
].disp
== insn
->ops
[s
]->indices
[i
].disp
)
1708 Converter::handleInstruction(unsigned int pos
)
1710 Value
*dst0
[4], *rDst0
[4];
1711 Value
*dst1
[4], *rDst1
[4];
1714 insn
= sm4
.insns
[pos
];
1715 enum sm4_opcode opcode
= static_cast<sm4_opcode
>(insn
->opcode
);
1717 operation op
= cvtOpcode(opcode
);
1719 sTy
= inferSrcType(opcode
);
1720 dTy
= inferDstType(opcode
);
1722 nc
= dTy
== TYPE_F64
? 2 : 4;
1724 nDstOpnds
= getDstOpndCount(opcode
);
1726 bool useScratchDst
= checkDstSrcAliasing();
1728 INFO("SM4_OPCODE_##%u, aliasing = %u\n", insn
->opcode
, useScratchDst
);
1730 if (nDstOpnds
>= 1) {
1731 for (c
= 0; c
< nc
; ++c
)
1732 rDst0
[c
] = dst0
[c
] =
1733 insn
->ops
[0].get()->mask
& (1 << c
) ? dst(0, c
) : NULL
;
1735 for (c
= 0; c
< nc
; ++c
)
1736 dst0
[c
] = rDst0
[c
] ? getScratch() : NULL
;
1739 if (nDstOpnds
>= 2) {
1740 for (c
= 0; c
< nc
; ++c
)
1741 rDst1
[c
] = dst1
[c
] =
1742 insn
->ops
[1].get()->mask
& (1 << c
) ? dst(1, c
) : NULL
;
1744 for (c
= 0; c
< nc
; ++c
)
1745 dst1
[c
] = rDst1
[c
] ? getScratch() : NULL
;
1748 switch (insn
->opcode
) {
1749 case SM4_OPCODE_ADD
:
1750 case SM4_OPCODE_AND
:
1751 case SM4_OPCODE_DIV
:
1752 case SM4_OPCODE_IADD
:
1753 case SM4_OPCODE_IMAX
:
1754 case SM4_OPCODE_IMIN
:
1755 case SM4_OPCODE_MIN
:
1756 case SM4_OPCODE_MAX
:
1757 case SM4_OPCODE_MUL
:
1759 case SM4_OPCODE_UMAX
:
1760 case SM4_OPCODE_UMIN
:
1761 case SM4_OPCODE_XOR
:
1762 FOR_EACH_DST0_ENABLED_CHANNEL32(c
) {
1763 Instruction
*insn
= mkOp2(op
, dTy
, dst0
[c
], src(0, c
), src(1, c
));
1764 if (dTy
== TYPE_F32
)
1769 case SM4_OPCODE_ISHL
:
1770 case SM4_OPCODE_ISHR
:
1771 case SM4_OPCODE_USHR
:
1772 FOR_EACH_DST0_ENABLED_CHANNEL32(c
) {
1773 Instruction
*insn
= mkOp2(op
, dTy
, dst0
[c
], src(0, c
), src(1, c
));
1774 insn
->subOp
= NV50_IR_SUBOP_SHIFT_WRAP
;
1778 case SM4_OPCODE_IMAD
:
1779 case SM4_OPCODE_MAD
:
1780 case SM4_OPCODE_UMAD
:
1781 FOR_EACH_DST0_ENABLED_CHANNEL32(c
) {
1782 mkOp3(OP_MAD
, dTy
, dst0
[c
], src(0, c
), src(1, c
), src(2, c
));
1786 case SM4_OPCODE_DADD
:
1787 case SM4_OPCODE_DMAX
:
1788 case SM4_OPCODE_DMIN
:
1789 case SM4_OPCODE_DMUL
:
1790 FOR_EACH_DST0_ENABLED_CHANNEL64(c
) {
1791 mkOp2(op
, dTy
, dst0
[c
], src(0, c
), src(1, c
));
1795 case SM4_OPCODE_UDIV
:
1796 for (c
= 0; c
< 4; ++c
) {
1798 if (dst0
[c
] || dst1
[c
]) {
1803 mkOp2(OP_DIV
, TYPE_U32
, dst0
[c
], dvn
, dvs
);
1805 mkOp2(OP_MOD
, TYPE_U32
, dst1
[c
], dvn
, dvs
);
1809 case SM4_OPCODE_IMUL
:
1810 case SM4_OPCODE_UMUL
:
1811 for (c
= 0; c
< 4; ++c
) {
1813 if (dst0
[c
] || dst1
[c
]) {
1818 mkOp2(OP_MUL
, dTy
, dst0
[c
], a
, b
)->subOp
=
1819 NV50_IR_SUBOP_MUL_HIGH
;
1821 mkOp2(OP_MUL
, dTy
, dst1
[c
], a
, b
);
1825 case SM4_OPCODE_DP2
:
1828 case SM4_OPCODE_DP3
:
1831 case SM4_OPCODE_DP4
:
1835 case SM4_OPCODE_DERIV_RTX
:
1836 case SM4_OPCODE_DERIV_RTX_COARSE
:
1837 case SM4_OPCODE_DERIV_RTX_FINE
:
1838 case SM4_OPCODE_DERIV_RTY
:
1839 case SM4_OPCODE_DERIV_RTY_COARSE
:
1840 case SM4_OPCODE_DERIV_RTY_FINE
:
1841 case SM4_OPCODE_MOV
:
1842 case SM4_OPCODE_INEG
:
1843 case SM4_OPCODE_NOT
:
1844 case SM4_OPCODE_SQRT
:
1845 case SM4_OPCODE_COUNTBITS
:
1846 case SM4_OPCODE_EXP
:
1847 case SM4_OPCODE_LOG
:
1848 case SM4_OPCODE_RCP
:
1849 FOR_EACH_DST0_ENABLED_CHANNEL32(c
) {
1850 mkOp1(op
, dTy
, dst0
[c
], src(0, c
));
1854 case SM4_OPCODE_FRC
:
1855 FOR_EACH_DST0_ENABLED_CHANNEL32(c
) {
1856 Value
*val
= getScratch();
1857 Value
*src0
= src(0, c
);
1858 mkOp1(OP_FLOOR
, TYPE_F32
, val
, src0
);
1859 mkOp2(OP_SUB
, TYPE_F32
, dst0
[c
], src0
, val
);
1863 case SM4_OPCODE_MOVC
:
1864 FOR_EACH_DST0_ENABLED_CHANNEL32(c
)
1865 mkCmp(OP_SLCT
, CC_NE
, TYPE_U32
, dst0
[c
], src(1, c
), src(2, c
),
1869 case SM4_OPCODE_ROUND_NE
:
1870 case SM4_OPCODE_ROUND_NI
:
1871 case SM4_OPCODE_ROUND_PI
:
1872 case SM4_OPCODE_ROUND_Z
:
1873 FOR_EACH_DST0_ENABLED_CHANNEL32(c
) {
1874 Instruction
*rnd
= mkOp1(op
, dTy
, dst0
[c
], src(0, c
));
1876 rnd
->rnd
= cvtRoundingMode(opcode
);
1880 case SM4_OPCODE_RSQ
:
1881 FOR_EACH_DST0_ENABLED_CHANNEL32(c
)
1882 mkOp1(op
, dTy
, dst0
[c
], src(0, c
));
1885 case SM4_OPCODE_SINCOS
:
1886 for (c
= 0; c
< 4; ++c
) {
1887 if (!dst0
[c
] && !dst1
[c
])
1889 Value
*val
= mkOp1v(OP_PRESIN
, TYPE_F32
, getScratch(), src(0, c
));
1891 mkOp1(OP_SIN
, TYPE_F32
, dst0
[c
], val
);
1893 mkOp1(OP_COS
, TYPE_F32
, dst1
[c
], val
);
1899 case SM4_OPCODE_IEQ
:
1900 case SM4_OPCODE_IGE
:
1901 case SM4_OPCODE_ILT
:
1904 case SM4_OPCODE_INE
:
1905 case SM4_OPCODE_ULT
:
1906 case SM4_OPCODE_UGE
:
1907 case SM4_OPCODE_DEQ
:
1908 case SM4_OPCODE_DGE
:
1909 case SM4_OPCODE_DLT
:
1910 case SM4_OPCODE_DNE
:
1912 CondCode cc
= cvtCondCode(opcode
);
1913 FOR_EACH_DST0_ENABLED_CHANNEL32(c
) {
1914 CmpInstruction
*set
;
1915 set
= mkCmp(op
, cc
, sTy
, dst0
[c
], src(0, c
), src(1, c
), NULL
);
1916 set
->setType(dTy
, sTy
);
1917 if (sTy
== TYPE_F32
)
1923 case SM4_OPCODE_FTOI
:
1924 case SM4_OPCODE_FTOU
:
1925 FOR_EACH_DST0_ENABLED_CHANNEL32(c
)
1926 mkCvt(op
, dTy
, dst0
[c
], sTy
, src(0, c
))->rnd
= ROUND_Z
;
1928 case SM4_OPCODE_ITOF
:
1929 case SM4_OPCODE_UTOF
:
1930 case SM4_OPCODE_F32TOF16
:
1931 case SM4_OPCODE_F16TOF32
:
1932 case SM4_OPCODE_DTOF
:
1933 case SM4_OPCODE_FTOD
:
1934 FOR_EACH_DST0_ENABLED_CHANNEL32(c
)
1935 mkCvt(op
, dTy
, dst0
[c
], sTy
, src(0, c
));
1938 case SM4_OPCODE_CUT
:
1939 case SM4_OPCODE_CUT_STREAM
:
1940 mkOp1(OP_RESTART
, TYPE_U32
, NULL
, mkImm(0))->fixed
= 1;
1942 case SM4_OPCODE_EMIT
:
1943 case SM4_OPCODE_EMIT_STREAM
:
1944 mkOp1(OP_EMIT
, TYPE_U32
, NULL
, mkImm(0))->fixed
= 1;
1946 case SM4_OPCODE_EMITTHENCUT
:
1947 case SM4_OPCODE_EMITTHENCUT_STREAM
:
1949 Instruction
*cut
= mkOp1(OP_EMIT
, TYPE_U32
, NULL
, mkImm(0));
1951 cut
->subOp
= NV50_IR_SUBOP_EMIT_RESTART
;
1955 case SM4_OPCODE_DISCARD
:
1956 info
.prop
.fp
.usesDiscard
= TRUE
;
1957 mkOp(OP_DISCARD
, TYPE_NONE
, NULL
)->setPredicate(
1958 insn
->insn
.test_nz
? CC_P
: CC_NOT_P
, src(0, 0));
1961 case SM4_OPCODE_CALL
:
1962 case SM4_OPCODE_CALLC
:
1963 assert(!"CALL/CALLC not implemented");
1966 case SM4_OPCODE_RET
:
1967 // XXX: the following doesn't work with subroutines / early ret
1968 if (!haveNextPhase(pos
))
1971 phaseEnded
= phase
+ 1;
1976 BasicBlock
*ifClause
= new BasicBlock(func
);
1978 bb
->cfg
.attach(&ifClause
->cfg
, Graph::Edge::TREE
);
1982 mkFlow(OP_BRA
, NULL
, insn
->insn
.test_nz
? CC_NOT_P
: CC_P
, src(0, 0));
1984 setPosition(ifClause
, true);
1987 case SM4_OPCODE_ELSE
:
1989 BasicBlock
*elseClause
= new BasicBlock(func
);
1990 BasicBlock
*forkPoint
= reinterpret_cast<BasicBlock
*>(condBBs
.pop().u
.p
);
1992 forkPoint
->cfg
.attach(&elseClause
->cfg
, Graph::Edge::TREE
);
1995 forkPoint
->getExit()->asFlow()->target
.bb
= elseClause
;
1996 if (!bb
->isTerminated())
1997 mkFlow(OP_BRA
, NULL
, CC_ALWAYS
, NULL
);
1999 setPosition(elseClause
, true);
2002 case SM4_OPCODE_ENDIF
:
2004 BasicBlock
*convPoint
= new BasicBlock(func
);
2005 BasicBlock
*lastBB
= reinterpret_cast<BasicBlock
*>(condBBs
.pop().u
.p
);
2006 BasicBlock
*forkPoint
= reinterpret_cast<BasicBlock
*>(joinBBs
.pop().u
.p
);
2008 if (!bb
->isTerminated()) {
2009 // we only want join if none of the clauses ended with CONT/BREAK/RET
2010 if (lastBB
->getExit()->op
== OP_BRA
&& joinBBs
.getSize() < 6)
2011 insertConvergenceOps(convPoint
, forkPoint
);
2012 mkFlow(OP_BRA
, convPoint
, CC_ALWAYS
, NULL
);
2013 bb
->cfg
.attach(&convPoint
->cfg
, Graph::Edge::FORWARD
);
2016 if (lastBB
->getExit()->op
== OP_BRA
) {
2017 lastBB
->cfg
.attach(&convPoint
->cfg
, Graph::Edge::FORWARD
);
2018 lastBB
->getExit()->asFlow()->target
.bb
= convPoint
;
2020 setPosition(convPoint
, true);
2024 case SM4_OPCODE_SWITCH
:
2025 case SM4_OPCODE_CASE
:
2026 case SM4_OPCODE_ENDSWITCH
:
2027 assert(!"SWITCH/CASE/ENDSWITCH not implemented");
2030 case SM4_OPCODE_LOOP
:
2032 BasicBlock
*loopHeader
= new BasicBlock(func
);
2033 BasicBlock
*loopBreak
= new BasicBlock(func
);
2035 loopBBs
.push(loopHeader
);
2036 breakBBs
.push(loopBreak
);
2037 if (loopBBs
.getSize() > func
->loopNestingBound
)
2038 func
->loopNestingBound
++;
2040 mkFlow(OP_PREBREAK
, loopBreak
, CC_ALWAYS
, NULL
);
2042 bb
->cfg
.attach(&loopHeader
->cfg
, Graph::Edge::TREE
);
2043 setPosition(loopHeader
, true);
2044 mkFlow(OP_PRECONT
, loopHeader
, CC_ALWAYS
, NULL
);
2047 case SM4_OPCODE_ENDLOOP
:
2049 BasicBlock
*loopBB
= reinterpret_cast<BasicBlock
*>(loopBBs
.pop().u
.p
);
2051 if (!bb
->isTerminated()) {
2052 mkFlow(OP_CONT
, loopBB
, CC_ALWAYS
, NULL
);
2053 bb
->cfg
.attach(&loopBB
->cfg
, Graph::Edge::BACK
);
2055 setPosition(reinterpret_cast<BasicBlock
*>(breakBBs
.pop().u
.p
), true);
2058 case SM4_OPCODE_BREAK
:
2060 if (bb
->isTerminated())
2062 BasicBlock
*breakBB
= reinterpret_cast<BasicBlock
*>(breakBBs
.peek().u
.p
);
2063 mkFlow(OP_BREAK
, breakBB
, CC_ALWAYS
, NULL
);
2064 bb
->cfg
.attach(&breakBB
->cfg
, Graph::Edge::CROSS
);
2067 case SM4_OPCODE_BREAKC
:
2069 BasicBlock
*nextBB
= new BasicBlock(func
);
2070 BasicBlock
*breakBB
= reinterpret_cast<BasicBlock
*>(breakBBs
.peek().u
.p
);
2071 CondCode cc
= insn
->insn
.test_nz
? CC_P
: CC_NOT_P
;
2072 mkFlow(OP_BREAK
, breakBB
, cc
, src(0, 0));
2073 bb
->cfg
.attach(&breakBB
->cfg
, Graph::Edge::CROSS
);
2074 bb
->cfg
.attach(&nextBB
->cfg
, Graph::Edge::FORWARD
);
2075 setPosition(nextBB
, true);
2078 case SM4_OPCODE_CONTINUE
:
2080 if (bb
->isTerminated())
2082 BasicBlock
*contBB
= reinterpret_cast<BasicBlock
*>(loopBBs
.peek().u
.p
);
2083 mkFlow(OP_CONT
, contBB
, CC_ALWAYS
, NULL
);
2084 contBB
->explicitCont
= true;
2085 bb
->cfg
.attach(&contBB
->cfg
, Graph::Edge::BACK
);
2088 case SM4_OPCODE_CONTINUEC
:
2090 BasicBlock
*nextBB
= new BasicBlock(func
);
2091 BasicBlock
*contBB
= reinterpret_cast<BasicBlock
*>(loopBBs
.peek().u
.p
);
2092 mkFlow(OP_CONT
, contBB
, insn
->insn
.test_nz
? CC_P
: CC_NOT_P
, src(0, 0));
2093 bb
->cfg
.attach(&contBB
->cfg
, Graph::Edge::BACK
);
2094 bb
->cfg
.attach(&nextBB
->cfg
, Graph::Edge::FORWARD
);
2095 setPosition(nextBB
, true);
2099 case SM4_OPCODE_SAMPLE
:
2100 case SM4_OPCODE_SAMPLE_C
:
2101 case SM4_OPCODE_SAMPLE_C_LZ
:
2102 case SM4_OPCODE_SAMPLE_L
:
2103 case SM4_OPCODE_SAMPLE_D
:
2104 case SM4_OPCODE_SAMPLE_B
:
2105 handleSAMPLE(op
, dst0
);
2108 case SM4_OPCODE_LD_MS
:
2112 case SM4_OPCODE_GATHER4
:
2113 assert(!"GATHER4 not implemented\n");
2116 case SM4_OPCODE_RESINFO
:
2117 handleQUERY(dst0
, TXQ_DIMS
);
2119 case SM4_OPCODE_SAMPLE_POS
:
2120 handleQUERY(dst0
, TXQ_SAMPLE_POSITION
);
2123 case SM4_OPCODE_NOP
:
2124 mkOp(OP_NOP
, TYPE_NONE
, NULL
);
2127 case SM4_OPCODE_HS_DECLS
:
2128 // XXX: any significance ?
2130 case SM4_OPCODE_HS_CONTROL_POINT_PHASE
:
2133 case SM4_OPCODE_HS_FORK_PHASE
:
2137 phaseInstance
= (phaseStart
== pos
) ? (phaseInstance
+ 1) : 0;
2139 if (info
.prop
.tp
.outputPatchSize
< phaseInstCnt
[0][subPhase
])
2142 case SM4_OPCODE_HS_JOIN_PHASE
:
2146 phaseInstance
= (phaseStart
== pos
) ? (phaseInstance
+ 1) : 0;
2148 if (info
.prop
.tp
.outputPatchSize
< phaseInstCnt
[1][subPhase
])
2153 ERROR("SM4_OPCODE_#%u illegal / not supported\n", insn
->opcode
);
2158 for (c
= 0; c
< nc
; ++c
) {
2159 if (nDstOpnds
>= 1 && rDst0
[c
]) {
2160 if (dst0
[c
] != rDst0
[c
])
2161 mkMov(rDst0
[c
], dst0
[c
]);
2162 saveDst(0, c
, rDst0
[c
]);
2164 if (nDstOpnds
>= 2 && rDst1
[c
]) {
2165 if (dst1
[c
] != rDst1
[c
])
2166 mkMov(rDst1
[c
], dst1
[c
]);
2167 saveDst(1, c
, rDst1
[c
]);
2171 memset(srcPtr
, 0, sizeof(srcPtr
));
2172 memset(dstPtr
, 0, sizeof(dstPtr
));
2173 memset(vtxBase
, 0, sizeof(vtxBase
));
2178 Converter::exportOutputs()
2180 for (int i
= 0; i
< info
.numOutputs
; ++i
) {
2181 for (int c
= 0; c
< 4; ++c
) {
2182 if (!oData
.exists(i
, c
))
2184 Symbol
*sym
= mkSymbol(FILE_SHADER_OUTPUT
, 0, TYPE_F32
,
2185 info
.out
[i
].slot
[c
] * 4);
2186 Value
*val
= oData
.load(i
, c
, NULL
);
2188 mkStore(OP_EXPORT
, TYPE_F32
, sym
, NULL
, val
);
2193 Converter::Converter(Program
*p
, struct nv50_ir_prog_info
*s
)
2198 sm4(*reinterpret_cast<const sm4_program
*>(s
->bin
.source
)),
2201 memset(srcPtr
, 0, sizeof(srcPtr
));
2202 memset(dstPtr
, 0, sizeof(dstPtr
));
2203 memset(vtxBase
, 0, sizeof(vtxBase
));
2205 memset(interpMode
, 0, sizeof(interpMode
));
2207 nrRegVals
= nrArrays
= arrayVol
= 0;
2209 for (phase
= 3; phase
> 0; --phase
)
2210 for (unsigned int i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; ++i
)
2211 out
[phase
- 1][i
].sn
= TGSI_SEMANTIC_COUNT
;
2213 unrollPhase
= false;
2215 subPhaseCnt
[0] = subPhaseCnt
[1] = 0;
2218 Converter::~Converter()
2224 delete[] phaseInstCnt
[0];
2226 delete[] phaseInstCnt
[1];
2230 Converter::haveNextPhase(unsigned int pos
) const
2233 return (pos
< sm4
.insns
.size()) &&
2234 (sm4
.insns
[pos
]->opcode
== SM4_OPCODE_HS_FORK_PHASE
||
2235 sm4
.insns
[pos
]->opcode
== SM4_OPCODE_HS_JOIN_PHASE
);
2243 for (unsigned int pos
= 0; pos
< sm4
.dcls
.size(); ++pos
)
2244 inspectDeclaration(*sm4
.dcls
[pos
]);
2246 phaseInstCnt
[0] = new unsigned int [subPhaseCnt
[0]];
2247 phaseInstCnt
[1] = new unsigned int [subPhaseCnt
[1]];
2248 for (int i
= 0; i
< subPhaseCnt
[0]; ++i
)
2249 phaseInstCnt
[0][i
] = -1;
2250 for (int i
= 0; i
< subPhaseCnt
[1]; ++i
)
2251 phaseInstCnt
[1][i
] = -1;
2252 // re-increased in handleDeclaration:
2253 subPhaseCnt
[0] = subPhaseCnt
[1] = 0;
2257 for (unsigned int pos
= 0; pos
< sm4
.dcls
.size(); ++pos
)
2258 handleDeclaration(*sm4
.dcls
[pos
]);
2260 info
.io
.genUserClip
= -1; // no UCPs permitted with SM4 shaders
2261 info
.io
.clipDistanceMask
= (1 << info
.io
.clipDistanceMask
) - 1;
2263 info
.assignSlots(&info
);
2265 if (sm4
.dcls
.size() == 0 && sm4
.insns
.size() == 0)
2268 BasicBlock
*entry
= new BasicBlock(prog
->main
);
2269 BasicBlock
*leave
= new BasicBlock(prog
->main
);
2271 prog
->main
->setEntry(entry
);
2272 prog
->main
->setExit(leave
);
2274 setPosition(entry
, true);
2276 entryBBs
.push(entry
);
2277 leaveBBs
.push(leave
);
2279 if (prog
->getType() == Program::TYPE_FRAGMENT
) {
2280 Symbol
*sv
= mkSysVal(SV_POSITION
, 3);
2281 fragCoord
[3] = mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), sv
);
2282 mkOp1(OP_RCP
, TYPE_F32
, fragCoord
[3], fragCoord
[3]);
2284 if (prog
->getType() == Program::TYPE_TESSELLATION_EVAL
) {
2285 const int n
= (info
.prop
.tp
.domain
== PIPE_PRIM_TRIANGLES
) ? 3 : 2;
2287 for (c
= 0; c
< n
; ++c
)
2289 mkOp1v(OP_RDSV
, TYPE_F32
, getSSA(), mkSysVal(SV_TESS_COORD
, c
));
2291 domainPt
[2] = loadImm(NULL
, 0.0f
);
2298 for (unsigned int pos
= 0; pos
< sm4
.insns
.size(); ++pos
) {
2299 handleInstruction(pos
);
2300 if (likely(phase
== 0) || (phaseEnded
< 2))
2303 if (!unrollPhase
|| !phaseInstanceUsed
) {
2307 phaseInstanceUsed
= false;
2308 if (phaseInstance
< (phaseInstCnt
[phase
- 1][subPhase
] - 1))
2309 pos
= phaseStart
- 1;
2318 } // anonymous namespace
2323 Program::makeFromSM4(struct nv50_ir_prog_info
*info
)
2325 Converter
bld(this, info
);
2329 } // namespace nv50_ir