2 * Copyright (C) 2006 Ben Skeggs.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 * Ben Skeggs <darktama@iinet.net.au>
38 #include "programopt.h"
39 #include "program_instruction.h"
41 #include "nouveau_context.h"
42 #include "nouveau_shader.h"
43 #include "nouveau_msg.h"
45 static nvsFixedReg _tx_mesa_vp_dst_reg
[VERT_RESULT_MAX
] = {
46 NVS_FR_POSITION
, NVS_FR_COL0
, NVS_FR_COL1
, NVS_FR_FOGCOORD
,
47 NVS_FR_TEXCOORD0
, NVS_FR_TEXCOORD1
, NVS_FR_TEXCOORD2
, NVS_FR_TEXCOORD3
,
48 NVS_FR_TEXCOORD4
, NVS_FR_TEXCOORD5
, NVS_FR_TEXCOORD6
, NVS_FR_TEXCOORD7
,
49 NVS_FR_POINTSZ
, NVS_FR_BFC0
, NVS_FR_BFC1
, NVS_FR_UNKNOWN
/* EDGE */
52 static nvsFixedReg _tx_mesa_fp_dst_reg
[FRAG_RESULT_MAX
] = {
53 NVS_FR_FRAGDATA0
/* COLR */, NVS_FR_FRAGDATA0
/* COLH */,
54 NVS_FR_UNKNOWN
/* DEPR */
57 static nvsFixedReg _tx_mesa_fp_src_reg
[FRAG_ATTRIB_MAX
] = {
58 NVS_FR_POSITION
, NVS_FR_COL0
, NVS_FR_COL1
, NVS_FR_FOGCOORD
,
59 NVS_FR_TEXCOORD0
, NVS_FR_TEXCOORD1
, NVS_FR_TEXCOORD2
, NVS_FR_TEXCOORD3
,
60 NVS_FR_TEXCOORD4
, NVS_FR_TEXCOORD5
, NVS_FR_TEXCOORD6
, NVS_FR_TEXCOORD7
63 static nvsSwzComp _tx_mesa_swizzle
[4] = {
64 NVS_SWZ_X
, NVS_SWZ_Y
, NVS_SWZ_Z
, NVS_SWZ_W
67 static nvsOpcode _tx_mesa_opcode
[] = {
68 [OPCODE_ABS
] = NVS_OP_ABS
, [OPCODE_ADD
] = NVS_OP_ADD
,
69 [OPCODE_ARA
] = NVS_OP_ARA
, [OPCODE_ARL
] = NVS_OP_ARL
,
70 [OPCODE_ARL_NV
] = NVS_OP_ARL
, [OPCODE_ARR
] = NVS_OP_ARR
,
71 [OPCODE_CMP
] = NVS_OP_CMP
, [OPCODE_COS
] = NVS_OP_COS
,
72 [OPCODE_DDX
] = NVS_OP_DDX
, [OPCODE_DDY
] = NVS_OP_DDY
,
73 [OPCODE_DP3
] = NVS_OP_DP3
, [OPCODE_DP4
] = NVS_OP_DP4
,
74 [OPCODE_DPH
] = NVS_OP_DPH
, [OPCODE_DST
] = NVS_OP_DST
,
75 [OPCODE_EX2
] = NVS_OP_EX2
, [OPCODE_EXP
] = NVS_OP_EXP
,
76 [OPCODE_FLR
] = NVS_OP_FLR
, [OPCODE_FRC
] = NVS_OP_FRC
,
77 [OPCODE_KIL
] = NVS_OP_EMUL
, [OPCODE_KIL_NV
] = NVS_OP_KIL
,
78 [OPCODE_LG2
] = NVS_OP_LG2
, [OPCODE_LIT
] = NVS_OP_LIT
,
79 [OPCODE_LOG
] = NVS_OP_LOG
,
80 [OPCODE_LRP
] = NVS_OP_LRP
,
81 [OPCODE_MAD
] = NVS_OP_MAD
, [OPCODE_MAX
] = NVS_OP_MAX
,
82 [OPCODE_MIN
] = NVS_OP_MIN
, [OPCODE_MOV
] = NVS_OP_MOV
,
83 [OPCODE_MUL
] = NVS_OP_MUL
,
84 [OPCODE_PK2H
] = NVS_OP_PK2H
, [OPCODE_PK2US
] = NVS_OP_PK2US
,
85 [OPCODE_PK4B
] = NVS_OP_PK4B
, [OPCODE_PK4UB
] = NVS_OP_PK4UB
,
86 [OPCODE_POW
] = NVS_OP_POW
, [OPCODE_POPA
] = NVS_OP_POPA
,
87 [OPCODE_PUSHA
] = NVS_OP_PUSHA
,
88 [OPCODE_RCC
] = NVS_OP_RCC
, [OPCODE_RCP
] = NVS_OP_RCP
,
89 [OPCODE_RFL
] = NVS_OP_RFL
, [OPCODE_RSQ
] = NVS_OP_RSQ
,
90 [OPCODE_SCS
] = NVS_OP_SCS
, [OPCODE_SEQ
] = NVS_OP_SEQ
,
91 [OPCODE_SFL
] = NVS_OP_SFL
, [OPCODE_SGE
] = NVS_OP_SGE
,
92 [OPCODE_SGT
] = NVS_OP_SGT
, [OPCODE_SIN
] = NVS_OP_SIN
,
93 [OPCODE_SLE
] = NVS_OP_SLE
, [OPCODE_SLT
] = NVS_OP_SLT
,
94 [OPCODE_SNE
] = NVS_OP_SNE
, [OPCODE_SSG
] = NVS_OP_SSG
,
95 [OPCODE_STR
] = NVS_OP_STR
, [OPCODE_SUB
] = NVS_OP_SUB
,
96 [OPCODE_SWZ
] = NVS_OP_MOV
,
97 [OPCODE_TEX
] = NVS_OP_TEX
, [OPCODE_TXB
] = NVS_OP_TXB
,
98 [OPCODE_TXD
] = NVS_OP_TXD
,
99 [OPCODE_TXL
] = NVS_OP_TXL
, [OPCODE_TXP
] = NVS_OP_TXP
,
100 [OPCODE_TXP_NV
] = NVS_OP_TXP
,
101 [OPCODE_UP2H
] = NVS_OP_UP2H
, [OPCODE_UP2US
] = NVS_OP_UP2US
,
102 [OPCODE_UP4B
] = NVS_OP_UP4B
, [OPCODE_UP4UB
] = NVS_OP_UP4UB
,
103 [OPCODE_X2D
] = NVS_OP_X2D
,
104 [OPCODE_XPD
] = NVS_OP_XPD
107 static nvsCond _tx_mesa_condmask
[] = {
108 NVS_COND_TR
, /* workaround mesa not filling a valid value */
109 NVS_COND_GT
, NVS_COND_LT
, NVS_COND_UN
, NVS_COND_GE
,
110 NVS_COND_LE
, NVS_COND_NE
, NVS_COND_NE
, NVS_COND_TR
, NVS_COND_FL
122 nvsRegister const_half
;
130 #define FILL_CONDITION_FLAGS(fragment) do { \
132 pass0_make_condmask(inst->DstReg.CondMask); \
133 if ((fragment)->cond != NVS_COND_TR) \
134 (fragment)->cond_test = 1; \
135 (fragment)->cond_reg = inst->CondDst; \
136 pass0_make_swizzle((fragment)->cond_swizzle, inst->DstReg.CondSwizzle);\
139 #define ARITH(op,dest,mask,sat,s0,s1,s2) do { \
140 nvsinst = pass0_emit(nvs, parent, fpos, (op), \
141 (dest), (mask), (sat), (s0), (s1), (s2));\
142 FILL_CONDITION_FLAGS(nvsinst); \
145 #define ARITHu(op,dest,mask,sat,s0,s1,s2) do { \
146 nvsinst = pass0_emit(nvs, parent, fpos, (op), \
147 (dest), (mask), (sat), (s0), (s1), (s2));\
151 pass0_append_fragment(nvsFragmentHeader
*parent
,
152 nvsFragmentHeader
*fragment
,
155 nvsFragmentHeader
**head
, **tail
;
156 assert(parent
&& fragment
);
158 switch (parent
->type
) {
161 head
= &((nvsBranch
*)parent
)->target_head
;
162 tail
= &((nvsBranch
*)parent
)->target_tail
;
164 head
= &((nvsBranch
*)parent
)->else_head
;
165 tail
= &((nvsBranch
*)parent
)->else_tail
;
169 head
= &((nvsLoop
*)parent
)->insn_head
;
170 tail
= &((nvsLoop
*)parent
)->insn_tail
;
173 head
= &((nvsSubroutine
*)parent
)->insn_head
;
174 tail
= &((nvsSubroutine
*)parent
)->insn_tail
;
181 fragment
->parent
= parent
;
182 fragment
->prev
= *tail
;
183 fragment
->next
= NULL
;
187 (*tail
)->next
= fragment
;
192 static nvsSubroutine
*
193 pass0_create_subroutine(nouveauShader
*nvs
, const char *label
)
197 sub
= CALLOC_STRUCT(nvs_subroutine
);
199 sub
->header
.type
= NVS_SUBROUTINE
;
200 sub
->label
= strdup(label
);
201 if (!nvs
->program_tree
)
202 nvs
->program_tree
= &sub
->header
;
204 pass0_append_fragment(nvs
->program_tree
,
212 pass0_make_reg(nouveauShader
*nvs
, nvsRegister
*reg
,
213 nvsRegFile file
, unsigned int index
)
215 struct pass0_rec
*rec
= nvs
->pass_rec
;
219 /* -1 == quick-and-dirty temp alloc */
220 if (file
== NVS_FILE_TEMP
&& index
== -1) {
221 index
= rec
->next_temp
++;
222 assert(index
< NVS_MAX_TEMPS
);
229 pass0_make_swizzle(nvsSwzComp
*swz
, unsigned int mesa
)
234 swz
[i
] = _tx_mesa_swizzle
[GET_SWZ(mesa
, i
)];
238 pass0_make_opcode(enum prog_opcode op
)
241 return NVS_OP_UNKNOWN
;
242 return _tx_mesa_opcode
[op
];
246 pass0_make_condmask(GLuint mesa
)
249 return NVS_COND_UNKNOWN
;
250 return _tx_mesa_condmask
[mesa
];
254 pass0_make_mask(GLuint mesa_mask
)
256 unsigned int mask
= 0;
258 if (mesa_mask
& WRITEMASK_X
) mask
|= SMASK_X
;
259 if (mesa_mask
& WRITEMASK_Y
) mask
|= SMASK_Y
;
260 if (mesa_mask
& WRITEMASK_Z
) mask
|= SMASK_Z
;
261 if (mesa_mask
& WRITEMASK_W
) mask
|= SMASK_W
;
267 pass0_opcode_is_tex(enum prog_opcode op
)
284 pass0_make_tex_target(GLuint mesa
)
287 case TEXTURE_1D_INDEX
: return NVS_TEX_TARGET_1D
;
288 case TEXTURE_2D_INDEX
: return NVS_TEX_TARGET_2D
;
289 case TEXTURE_3D_INDEX
: return NVS_TEX_TARGET_3D
;
290 case TEXTURE_CUBE_INDEX
: return NVS_TEX_TARGET_CUBE
;
291 case TEXTURE_RECT_INDEX
: return NVS_TEX_TARGET_RECT
;
293 return NVS_TEX_TARGET_UNKNOWN
;
298 pass0_make_dst_reg(nvsPtr nvs
, nvsRegister
*reg
,
299 struct prog_dst_register
*dst
)
301 struct gl_program
*mesa
= (struct gl_program
*)&nvs
->mesa
.vp
;
306 if (mesa
->Target
== GL_VERTEX_PROGRAM_ARB
) {
307 sfr
= (dst
->Index
< VERT_RESULT_MAX
) ?
308 _tx_mesa_vp_dst_reg
[dst
->Index
] :
311 sfr
= (dst
->Index
< FRAG_RESULT_MAX
) ?
312 _tx_mesa_fp_dst_reg
[dst
->Index
] :
315 pass0_make_reg(nvs
, reg
, NVS_FILE_RESULT
, sfr
);
317 case PROGRAM_TEMPORARY
:
318 pass0_make_reg(nvs
, reg
, NVS_FILE_TEMP
, dst
->Index
);
320 case PROGRAM_ADDRESS
:
321 pass0_make_reg(nvs
, reg
, NVS_FILE_ADDRESS
, dst
->Index
);
324 fprintf(stderr
, "Unknown dest file %d\n", dst
->File
);
330 pass0_make_src_reg(nvsPtr nvs
, nvsRegister
*reg
, struct prog_src_register
*src
)
332 struct pass0_rec
*rec
= nvs
->pass_rec
;
333 struct gl_program
*mesa
= (struct gl_program
*)&nvs
->mesa
.vp
.Base
;
340 reg
->file
= NVS_FILE_ATTRIB
;
341 if (mesa
->Target
== GL_VERTEX_PROGRAM_ARB
) {
342 for (i
=0; i
<NVS_MAX_ATTRIBS
; i
++) {
343 if (nvs
->vp_attrib_map
[i
] == src
->Index
) {
348 if (i
==NVS_MAX_ATTRIBS
)
349 reg
->index
= NVS_FR_UNKNOWN
;
351 reg
->index
= (src
->Index
< FRAG_ATTRIB_MAX
) ?
352 _tx_mesa_fp_src_reg
[src
->Index
] :
356 case PROGRAM_STATE_VAR
:
357 case PROGRAM_NAMED_PARAM
:
358 case PROGRAM_CONSTANT
:
359 reg
->file
= NVS_FILE_CONST
;
360 reg
->index
= src
->Index
+ rec
->mesa_const_base
;
361 reg
->indexed
= src
->RelAddr
;
364 reg
->addr_comp
= NVS_SWZ_X
;
367 case PROGRAM_TEMPORARY
:
368 reg
->file
= NVS_FILE_TEMP
;
369 reg
->index
= src
->Index
;
372 fprintf(stderr
, "Unknown source type %d\n", src
->File
);
376 /* per-component negate handled elsewhere */
377 reg
->negate
= src
->NegateBase
!= 0;
379 pass0_make_swizzle(reg
->swizzle
, src
->Swizzle
);
382 static nvsInstruction
*
383 pass0_emit(nouveauShader
*nvs
, nvsFragmentHeader
*parent
, int fpos
,
384 nvsOpcode op
, nvsRegister dst
,
385 unsigned int mask
, int saturate
,
386 nvsRegister src0
, nvsRegister src1
, nvsRegister src2
)
390 sif
= CALLOC_STRUCT(nvs_instruction
);
394 /* Seems mesa doesn't explicitly 0 this.. */
395 if (nvs
->mesa
.vp
.Base
.Target
== GL_VERTEX_PROGRAM_ARB
)
399 sif
->saturate
= saturate
;
402 sif
->dest_scale
= NVS_SCALE_1X
;
410 pass0_make_swizzle(sif
->cond_swizzle
, SWIZZLE_NOOP
);
411 pass0_append_fragment(parent
, &sif
->header
, fpos
);
417 pass0_fixup_swizzle(nvsPtr nvs
, nvsFragmentHeader
*parent
, int fpos
,
418 struct prog_src_register
*src
,
422 static const float sc
[4] = { 1.0, 0.0, -1.0, 0.0 };
423 struct pass0_rec
*rec
= nvs
->pass_rec
;
424 int fixup_1
, fixup_2
;
425 nvsInstruction
*nvsinst
;
426 nvsRegister sr
, dr
= nvr_unused
;
427 nvsRegister sm1const
, sm2const
;
429 if (!rec
->swzconst_done
) {
430 struct gl_program
*prog
= &nvs
->mesa
.vp
.Base
;
431 rec
->swzconst_id
= _mesa_add_unnamed_constant(prog
->Parameters
,
433 rec
->swzconst_done
= 1;
434 COPY_4V(nvs
->params
[rec
->swzconst_id
].val
, sc
);
437 fixup_1
= (sm1
!= MAKE_SWIZZLE4(0,0,0,0) &&
438 sm2
!= MAKE_SWIZZLE4(2,2,2,2));
439 fixup_2
= (sm2
!= MAKE_SWIZZLE4(2,2,2,2));
441 if (src
->File
!= PROGRAM_TEMPORARY
&& src
->File
!= PROGRAM_INPUT
) {
442 /* We can't use more than one const in an instruction,
443 * so move the const into a temp, and swizzle from there.
445 * TODO: should just emit the swizzled const, instead of
446 * swizzling it in the shader.. would need to reswizzle
447 * any state params when they change however..
449 pass0_make_reg(nvs
, &dr
, NVS_FILE_TEMP
, -1);
450 pass0_make_src_reg(nvs
, &sr
, src
);
451 ARITHu(NVS_OP_MOV
, dr
, SMASK_ALL
, 0,
452 sr
, nvr_unused
, nvr_unused
);
453 pass0_make_reg(nvs
, &sr
, NVS_FILE_TEMP
, dr
.index
);
457 pass0_make_src_reg(nvs
, &sr
, src
);
458 pass0_make_reg(nvs
, &dr
, NVS_FILE_TEMP
, -1);
461 pass0_make_reg(nvs
, &sm1const
, NVS_FILE_CONST
, rec
->swzconst_id
);
462 pass0_make_swizzle(sm1const
.swizzle
, sm1
);
463 if (fixup_1
&& fixup_2
) {
464 /* Any combination with SWIZZLE_ONE */
465 pass0_make_reg(nvs
, &sm2const
,
466 NVS_FILE_CONST
, rec
->swzconst_id
);
467 pass0_make_swizzle(sm2const
.swizzle
, sm2
);
468 ARITHu(NVS_OP_MAD
, dr
, SMASK_ALL
, 0, sr
, sm1const
, sm2const
);
470 /* SWIZZLE_ZERO || arbitrary negate */
471 ARITHu(NVS_OP_MUL
, dr
, SMASK_ALL
, 0, sr
, sm1const
, nvr_unused
);
474 src
->File
= PROGRAM_TEMPORARY
;
475 src
->Index
= dr
.index
;
476 src
->Swizzle
= SWIZZLE_NOOP
;
479 #define SET_SWZ(fs, cp, c) fs = (fs & ~(0x7<<(cp*3))) | (c<<(cp*3))
481 pass0_check_sources(nvsPtr nvs
, nvsFragmentHeader
*parent
, int fpos
,
482 struct prog_instruction
*inst
)
484 unsigned int insrc
= -1, constsrc
= -1;
487 for (i
=0;i
<_mesa_num_inst_src_regs(inst
->Opcode
);i
++) {
488 struct prog_src_register
*src
= &inst
->SrcReg
[i
];
489 unsigned int sm_1
= 0, sm_2
= 0;
493 /* Build up swizzle masks as if we were going to use
494 * "MAD new, src, const1, const2" to support arbitrary negation
495 * and SWIZZLE_ZERO/SWIZZLE_ONE.
498 if (GET_SWZ(src
->Swizzle
, c
) == SWIZZLE_ZERO
) {
499 SET_SWZ(sm_1
, c
, SWIZZLE_Y
); /* 0.0 */
500 SET_SWZ(sm_2
, c
, SWIZZLE_Y
);
501 SET_SWZ(src
->Swizzle
, c
, SWIZZLE_X
);
502 } else if (GET_SWZ(src
->Swizzle
, c
) == SWIZZLE_ONE
) {
503 SET_SWZ(sm_1
, c
, SWIZZLE_Y
);
504 if (src
->NegateBase
& (1<<c
))
505 SET_SWZ(sm_2
, c
, SWIZZLE_Z
); /* -1.0 */
507 SET_SWZ(sm_2
, c
, SWIZZLE_X
); /* 1.0 */
508 SET_SWZ(src
->Swizzle
, c
, SWIZZLE_X
);
510 if (src
->NegateBase
& (1<<c
))
511 SET_SWZ(sm_1
, c
, SWIZZLE_Z
); /* -[xyzw] */
513 SET_SWZ(sm_1
, c
, SWIZZLE_X
); /*[xyzw]*/
514 SET_SWZ(sm_2
, c
, SWIZZLE_Y
);
518 /* Unless we're multiplying by 1.0 or -1.0 on all components,
519 * and we're adding nothing to any component we have to
520 * emulate the swizzle.
522 if ((sm_1
!= MAKE_SWIZZLE4(0,0,0,0) &&
523 sm_1
!= MAKE_SWIZZLE4(2,2,2,2)) ||
524 sm_2
!= MAKE_SWIZZLE4(1,1,1,1)) {
525 pass0_fixup_swizzle(nvs
, parent
, fpos
, src
, sm_1
, sm_2
);
526 /* The source is definitely in a temp now, so don't
527 * bother checking for multiple ATTRIB/CONST regs.
532 /* HW can't use more than one ATTRIB or PARAM in a single
536 if (insrc
!= -1 && insrc
!= src
->Index
)
538 else insrc
= src
->Index
;
540 case PROGRAM_STATE_VAR
:
541 if (constsrc
!= -1 && constsrc
!= src
->Index
)
543 else constsrc
= src
->Index
;
549 /* Emit any extra ATTRIB/CONST to a temp, and modify the Mesa
550 * instruction to point at the temp.
553 pass0_make_src_reg(nvs
, &sr
, src
);
554 pass0_make_reg(nvs
, &dr
, NVS_FILE_TEMP
, -1);
555 pass0_emit(nvs
, parent
, fpos
, NVS_OP_MOV
,
557 sr
, nvr_unused
, nvr_unused
);
559 src
->File
= PROGRAM_TEMPORARY
;
560 src
->Index
= dr
.index
;
561 src
->Swizzle
= SWIZZLE_NOOP
;
567 pass0_emulate_instruction(nouveauShader
*nvs
,
568 nvsFragmentHeader
*parent
, int fpos
,
569 struct prog_instruction
*inst
)
571 nvsFunc
*shader
= nvs
->func
;
572 nvsRegister src
[3], dest
, temp
;
573 nvsInstruction
*nvsinst
;
574 unsigned int mask
= pass0_make_mask(inst
->DstReg
.WriteMask
);
577 sat
= (inst
->SaturateMode
== SATURATE_ZERO_ONE
);
579 /* Build all the "real" regs for the instruction */
580 for (i
=0; i
<_mesa_num_inst_src_regs(inst
->Opcode
); i
++)
581 pass0_make_src_reg(nvs
, &src
[i
], &inst
->SrcReg
[i
]);
582 if (inst
->Opcode
!= OPCODE_KIL
)
583 pass0_make_dst_reg(nvs
, &dest
, &inst
->DstReg
);
585 switch (inst
->Opcode
) {
587 if (shader
->caps
& SCAP_SRC_ABS
)
588 ARITH(NVS_OP_MOV
, dest
, mask
, sat
,
589 nvsAbs(src
[0]), nvr_unused
, nvr_unused
);
591 ARITH(NVS_OP_MAX
, dest
, mask
, sat
,
592 src
[0], nvsNegate(src
[0]), nvr_unused
);
595 /*XXX: this will clobber CC0... */
596 ARITH (NVS_OP_MOV
, dest
, mask
, sat
,
597 src
[2], nvr_unused
, nvr_unused
);
598 pass0_make_reg(nvs
, &temp
, NVS_FILE_TEMP
, -1);
599 ARITHu(NVS_OP_MOV
, temp
, SMASK_ALL
, 0,
600 src
[0], nvr_unused
, nvr_unused
);
601 nvsinst
->cond_update
= 1;
602 nvsinst
->cond_reg
= 0;
603 ARITH (NVS_OP_MOV
, dest
, mask
, sat
,
604 src
[1], nvr_unused
, nvr_unused
);
605 nvsinst
->cond
= COND_LT
;
606 nvsinst
->cond_reg
= 0;
607 nvsinst
->cond_test
= 1;
610 pass0_make_reg(nvs
, &temp
, NVS_FILE_TEMP
, -1);
611 ARITHu(NVS_OP_DP3
, temp
, SMASK_X
, 0,
612 src
[0], src
[1], nvr_unused
);
613 ARITH (NVS_OP_ADD
, dest
, mask
, sat
,
614 nvsSwizzle(temp
, X
, X
, X
, X
),
615 nvsSwizzle(src
[1], W
, W
, W
, W
),
619 /* This is only in ARB shaders, so we don't have to worry
620 * about clobbering a CC reg as they aren't supported anyway.
621 *XXX: might have to worry with GLSL however...
623 /* MOVC0 temp, src */
624 pass0_make_reg(nvs
, &temp
, NVS_FILE_TEMP
, -1);
625 ARITHu(NVS_OP_MOV
, temp
, SMASK_ALL
, 0,
626 src
[0], nvr_unused
, nvr_unused
);
627 nvsinst
->cond_update
= 1;
628 nvsinst
->cond_reg
= 0;
629 /* KIL_NV (LT0.xyzw) temp */
630 ARITHu(NVS_OP_KIL
, nvr_unused
, 0, 0,
631 nvr_unused
, nvr_unused
, nvr_unused
);
632 nvsinst
->cond
= COND_LT
;
633 nvsinst
->cond_reg
= 0;
634 nvsinst
->cond_test
= 1;
635 pass0_make_swizzle(nvsinst
->cond_swizzle
, SWIZZLE_NOOP
);
638 pass0_make_reg(nvs
, &temp
, NVS_FILE_TEMP
, -1);
639 ARITHu(NVS_OP_MAD
, temp
, mask
, 0,
640 nvsNegate(src
[0]), src
[2], src
[2]);
641 ARITH (NVS_OP_MAD
, dest
, mask
, sat
, src
[0], src
[1], temp
);
644 if (shader
->SupportsOpcode(shader
, NVS_OP_LG2
) &&
645 shader
->SupportsOpcode(shader
, NVS_OP_EX2
)) {
646 pass0_make_reg(nvs
, &temp
, NVS_FILE_TEMP
, -1);
647 /* LG2 temp.x, src0.c */
648 ARITHu(NVS_OP_LG2
, temp
, SMASK_X
, 0,
649 nvsSwizzle(src
[0], X
, X
, X
, X
),
650 nvr_unused
, nvr_unused
);
651 /* MUL temp.x, temp.x, src1.c */
652 ARITHu(NVS_OP_MUL
, temp
, SMASK_X
, 0,
653 nvsSwizzle(temp
, X
, X
, X
, X
),
654 nvsSwizzle(src
[1], X
, X
, X
, X
),
656 /* EX2 dest, temp.x */
657 ARITH (NVS_OP_EX2
, dest
, mask
, sat
,
658 nvsSwizzle(temp
, X
, X
, X
, X
),
659 nvr_unused
, nvr_unused
);
661 /* can we use EXP/LOG instead of EX2/LG2?? */
662 fprintf(stderr
, "Implement POW for NV20 vtxprog!\n");
667 pass0_make_reg(nvs
, &temp
, NVS_FILE_TEMP
, -1);
668 ARITHu(NVS_OP_LG2
, temp
, SMASK_X
, 0,
669 nvsAbs(nvsSwizzle(src
[0], X
, X
, X
, X
)),
670 nvr_unused
, nvr_unused
);
671 nvsinst
->dest_scale
= NVS_SCALE_INV_2X
;
672 ARITH (NVS_OP_EX2
, dest
, mask
, sat
,
673 nvsNegate(nvsSwizzle(temp
, X
, X
, X
, X
)),
674 nvr_unused
, nvr_unused
);
678 ARITH(NVS_OP_COS
, dest
, SMASK_X
, sat
,
679 nvsSwizzle(src
[0], X
, X
, X
, X
),
680 nvr_unused
, nvr_unused
);
682 ARITH(NVS_OP_SIN
, dest
, SMASK_Y
, sat
,
683 nvsSwizzle(src
[0], X
, X
, X
, X
),
684 nvr_unused
, nvr_unused
);
687 ARITH(NVS_OP_ADD
, dest
, mask
, sat
,
688 src
[0], nvsNegate(src
[1]), nvr_unused
);
691 pass0_make_reg(nvs
, &temp
, NVS_FILE_TEMP
, -1);
692 ARITHu(NVS_OP_MUL
, temp
, SMASK_ALL
, 0,
693 nvsSwizzle(src
[0], Z
, X
, Y
, Y
),
694 nvsSwizzle(src
[1], Y
, Z
, X
, X
),
696 ARITH (NVS_OP_MAD
, dest
, (mask
& ~SMASK_W
), sat
,
697 nvsSwizzle(src
[0], Y
, Z
, X
, X
),
698 nvsSwizzle(src
[1], Z
, X
, Y
, Y
),
702 WARN_ONCE("hw doesn't support opcode \"%s\","
703 "and no emulation found\n",
704 _mesa_opcode_string(inst
->Opcode
));
712 pass0_translate_arith(nouveauShader
*nvs
, struct gl_program
*prog
,
714 nvsFragmentHeader
*parent
)
716 struct prog_instruction
*inst
= &prog
->Instructions
[ipos
];
717 nvsFunc
*shader
= nvs
->func
;
718 nvsInstruction
*nvsinst
;
721 /* Deal with multiple ATTRIB/PARAM in a single instruction */
722 pass0_check_sources(nvs
, parent
, fpos
, inst
);
724 /* Now it's safe to do the prog_instruction->nvsInstruction
727 if (shader
->SupportsOpcode(shader
,
728 pass0_make_opcode(inst
->Opcode
))) {
729 nvsRegister src
[3], dest
;
732 for (i
=0; i
<_mesa_num_inst_src_regs(inst
->Opcode
); i
++)
733 pass0_make_src_reg(nvs
, &src
[i
], &inst
->SrcReg
[i
]);
734 pass0_make_dst_reg(nvs
, &dest
, &inst
->DstReg
);
736 ARITH(pass0_make_opcode(inst
->Opcode
), dest
,
737 pass0_make_mask(inst
->DstReg
.WriteMask
),
738 (inst
->SaturateMode
!= SATURATE_OFF
),
739 src
[0], src
[1], src
[2]);
740 nvsinst
->tex_unit
= inst
->TexSrcUnit
;
741 if (pass0_opcode_is_tex(inst
->Opcode
))
742 nvsinst
->tex_target
=
743 pass0_make_tex_target(inst
->TexSrcTarget
);
745 nvsinst
->tex_target
= NVS_TEX_TARGET_UNKNOWN
;
749 ret
= pass0_emulate_instruction(nvs
, parent
, fpos
, inst
);
755 pass0_translate_instructions(nouveauShader
*nvs
, int ipos
, int fpos
,
756 nvsFragmentHeader
*parent
)
758 struct gl_program
*prog
= (struct gl_program
*)&nvs
->mesa
.vp
;
761 struct prog_instruction
*inst
= &prog
->Instructions
[ipos
];
763 switch (inst
->Opcode
) {
770 //case OPCODE_ENDLOOP:
774 WARN_ONCE("branch ops unimplemented\n");
778 if (!pass0_translate_arith(nvs
, prog
,
791 pass0_build_attrib_map(nouveauShader
*nvs
, struct gl_vertex_program
*vp
)
793 GLuint inputs_read
= vp
->Base
.InputsRead
;
794 GLuint input_alloc
= ~0xFFFF;
797 for (i
=0; i
<NVS_MAX_ATTRIBS
; i
++)
798 nvs
->vp_attrib_map
[i
] = -1;
800 while (inputs_read
) {
801 int in
= ffs(inputs_read
) - 1;
803 inputs_read
&= ~(1<<in
);
805 if (vp
->IsNVProgram
) {
806 /* NVvp: must alias */
807 if (in
>= VERT_ATTRIB_GENERIC0
)
808 hw
= in
- VERT_ATTRIB_GENERIC0
;
812 /* ARBvp: may alias (but we won't)
813 * GL2.0: must not alias
815 if (in
>= VERT_ATTRIB_GENERIC0
)
816 hw
= ffs(~input_alloc
) - 1;
819 input_alloc
|= (1<<hw
);
822 nvs
->vp_attrib_map
[hw
] = in
;
825 if (NOUVEAU_DEBUG
& DEBUG_SHADERS
) {
826 printf("vtxprog attrib map:\n");
827 for (i
=0; i
<NVS_MAX_ATTRIBS
; i
++) {
828 printf(" hw:%d = attrib:%d\n",
829 i
, nvs
->vp_attrib_map
[i
]);
835 pass0_vp_insert_ff_clip_planes(GLcontext
*ctx
, nouveauShader
*nvs
)
837 struct gl_program
*prog
= &nvs
->mesa
.vp
.Base
;
838 nvsFragmentHeader
*parent
= nvs
->program_tree
;
839 nvsInstruction
*nvsinst
;
841 nvsRegister opos
, epos
, eqn
, mv
[4];
842 GLint tokens
[6] = { STATE_MATRIX
, STATE_MODELVIEW
, 0, 0, 0, 0 };
846 /* modelview transform */
847 pass0_make_reg(nvs
, &opos
, NVS_FILE_ATTRIB
, NVS_FR_POSITION
);
848 pass0_make_reg(nvs
, &epos
, NVS_FILE_TEMP
, -1);
849 for (i
=0; i
<4; i
++) {
850 tokens
[3] = tokens
[4] = i
;
851 id
= _mesa_add_state_reference(prog
->Parameters
, tokens
);
852 pass0_make_reg(nvs
, &mv
[i
], NVS_FILE_CONST
, id
);
854 ARITHu(NVS_OP_DP4
, epos
, SMASK_X
, 0, opos
, mv
[0], nvr_unused
);
855 ARITHu(NVS_OP_DP4
, epos
, SMASK_Y
, 0, opos
, mv
[1], nvr_unused
);
856 ARITHu(NVS_OP_DP4
, epos
, SMASK_Z
, 0, opos
, mv
[2], nvr_unused
);
857 ARITHu(NVS_OP_DP4
, epos
, SMASK_W
, 0, opos
, mv
[3], nvr_unused
);
859 /* Emit code to emulate fixed-function glClipPlane */
860 for (i
=0; i
<6; i
++) {
861 GLuint clipmask
= SMASK_X
;
864 if (!(ctx
->Transform
.ClipPlanesEnabled
& (1<<i
)))
867 /* Point a const at a user clipping plane */
868 tokens
[0] = STATE_CLIPPLANE
;
870 id
= _mesa_add_state_reference(prog
->Parameters
, tokens
);
871 pass0_make_reg(nvs
, &eqn
, NVS_FILE_CONST
, id
);
872 pass0_make_reg(nvs
, &clip
, NVS_FILE_RESULT
, NVS_FR_CLIP0
+ i
);
874 /*XXX: something else needs to take care of modifying the
875 * instructions to write to the correct hw clip register.
878 case 0: case 3: clipmask
= SMASK_Y
; break;
879 case 1: case 4: clipmask
= SMASK_Z
; break;
880 case 2: case 5: clipmask
= SMASK_W
; break;
884 ARITHu(NVS_OP_DP4
, clip
, clipmask
, 0, epos
, eqn
, nvr_unused
);
889 pass0_rebase_mesa_consts(nouveauShader
*nvs
)
891 struct pass0_rec
*rec
= nvs
->pass_rec
;
892 struct gl_program
*prog
= &nvs
->mesa
.vp
.Base
;
893 struct prog_instruction
*inst
= prog
->Instructions
;
896 /*XXX: not a good idea, params->hw_index is malloc'd */
897 memset(nvs
->params
, 0x00, sizeof(nvs
->params
));
899 /* When doing relative addressing on constants, the hardware needs us
900 * to fill the "const id" field with a positive value. Determine the
901 * most negative index that is used so that all accesses to a
902 * mesa-provided constant can be rebased to a positive index.
904 while (inst
->Opcode
!= OPCODE_END
) {
905 for (i
=0; i
<_mesa_num_inst_src_regs(inst
->Opcode
); i
++) {
906 struct prog_src_register
*src
= &inst
->SrcReg
[i
];
909 case PROGRAM_STATE_VAR
:
910 case PROGRAM_CONSTANT
:
911 case PROGRAM_NAMED_PARAM
:
912 if (src
->RelAddr
&& src
->Index
< 0) {
913 int base
= src
->Index
* -1;
914 if (rec
->mesa_const_base
< base
)
915 rec
->mesa_const_base
= base
;
928 pass0_resolve_mesa_consts(nouveauShader
*nvs
)
930 struct pass0_rec
*rec
= nvs
->pass_rec
;
931 struct gl_program
*prog
= &nvs
->mesa
.vp
.Base
;
932 struct gl_program_parameter_list
*plist
= prog
->Parameters
;
935 /* Init all const tracking/alloc info from the parameter list, rather
936 * than doing it as we translate the program. Otherwise:
937 * 1) we can't get at the correct constant info when relative
938 * addressing is being used due to src->Index not pointing
939 * at the exact const;
940 * 2) as we add extra consts to the program, mesa will call realloc()
941 * and we get invalid pointers to the const data.
943 rec
->mesa_const_last
= plist
->NumParameters
+ rec
->mesa_const_base
;
944 nvs
->param_high
= rec
->mesa_const_last
;
945 for (i
=0; i
<plist
->NumParameters
; i
++) {
946 int hw
= rec
->mesa_const_base
+ i
;
948 if (hw
> NVS_MAX_CONSTS
) {
949 nvsProgramError(nvs
, "hw = %d > NVS_MAX_CONSTS!\n", hw
);
953 switch (plist
->Parameters
[i
].Type
) {
954 case PROGRAM_NAMED_PARAM
:
955 case PROGRAM_STATE_VAR
:
956 nvs
->params
[hw
].in_use
= GL_TRUE
;
957 nvs
->params
[hw
].source_val
= plist
->ParameterValues
[i
];
958 COPY_4V(nvs
->params
[hw
].val
, plist
->ParameterValues
[i
]);
960 case PROGRAM_CONSTANT
:
961 nvs
->params
[hw
].in_use
= GL_TRUE
;
962 nvs
->params
[hw
].source_val
= NULL
;
963 COPY_4V(nvs
->params
[hw
].val
, plist
->ParameterValues
[i
]);
966 nvsProgramError(nvs
, "hit bad type=%d on param %d\n",
967 plist
->Parameters
[i
].Type
, i
);
976 nouveau_shader_pass0(GLcontext
*ctx
, nouveauShader
*nvs
)
978 nouveauContextPtr nmesa
= NOUVEAU_CONTEXT(ctx
);
979 struct gl_program
*prog
= (struct gl_program
*)nvs
;
980 struct gl_vertex_program
*vp
= (struct gl_vertex_program
*)prog
;
981 struct gl_fragment_program
*fp
= (struct gl_fragment_program
*)prog
;
982 struct pass0_rec
*rec
;
985 NVSDBG("start: nvs=%p\n", nvs
);
987 /* Previously detected an error, and haven't recieved new program
988 * string, so fail immediately.
991 NVSDBG("failed previous compile attempt, not retrying\n");
995 rec
= CALLOC_STRUCT(pass0_rec
);
999 rec
->next_temp
= prog
->NumTemporaries
;
1000 nvs
->pass_rec
= rec
;
1002 nvs
->program_tree
= (nvsFragmentHeader
*)
1003 pass0_create_subroutine(nvs
, "program body");
1004 if (!nvs
->program_tree
) {
1009 switch (prog
->Target
) {
1010 case GL_VERTEX_PROGRAM_ARB
:
1011 nvs
->func
= &nmesa
->VPfunc
;
1013 if (vp
->IsPositionInvariant
)
1014 _mesa_insert_mvp_code(ctx
, vp
);
1015 pass0_rebase_mesa_consts(nvs
);
1017 if (!prog
->String
&& ctx
->Transform
.ClipPlanesEnabled
)
1018 pass0_vp_insert_ff_clip_planes(ctx
, nvs
);
1020 pass0_build_attrib_map(nvs
, vp
);
1022 case GL_FRAGMENT_PROGRAM_ARB
:
1023 nvs
->func
= &nmesa
->FPfunc
;
1025 if (fp
->FogOption
!= GL_NONE
)
1026 _mesa_append_fog_code(ctx
, fp
);
1027 pass0_rebase_mesa_consts(nvs
);
1030 fprintf(stderr
, "Unknown program type %d", prog
->Target
);
1032 /* DESTROY TREE!! */
1035 nvs
->func
->card_priv
= &nvs
->card_priv
;
1037 ret
= pass0_translate_instructions(nvs
, 0, 0, nvs
->program_tree
);
1039 ret
= pass0_resolve_mesa_consts(nvs
);
1041 /*XXX: if (!ret) DESTROY TREE!!! */