nouveau: oops
[mesa.git] / src / mesa / drivers / dri / nouveau / nouveau_shader_0.c
1 /*
2 * Copyright (C) 2006 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /*
29 * Authors:
30 * Ben Skeggs <darktama@iinet.net.au>
31 */
32
33 #include "glheader.h"
34 #include "macros.h"
35 #include "enums.h"
36
37 #include "program.h"
38 #include "programopt.h"
39 #include "program_instruction.h"
40
41 #include "nouveau_context.h"
42 #include "nouveau_shader.h"
43 #include "nouveau_msg.h"
44
45 static nvsFixedReg _tx_mesa_vp_dst_reg[VERT_RESULT_MAX] = {
46 NVS_FR_POSITION, NVS_FR_COL0, NVS_FR_COL1, NVS_FR_FOGCOORD,
47 NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3,
48 NVS_FR_TEXCOORD4, NVS_FR_TEXCOORD5, NVS_FR_TEXCOORD6, NVS_FR_TEXCOORD7,
49 NVS_FR_POINTSZ, NVS_FR_BFC0, NVS_FR_BFC1, NVS_FR_UNKNOWN /* EDGE */
50 };
51
52 static nvsFixedReg _tx_mesa_fp_dst_reg[FRAG_RESULT_MAX] = {
53 NVS_FR_FRAGDATA0 /* COLR */, NVS_FR_FRAGDATA0 /* COLH */,
54 NVS_FR_UNKNOWN /* DEPR */
55 };
56
57 static nvsFixedReg _tx_mesa_vp_src_reg[VERT_ATTRIB_MAX] = {
58 NVS_FR_POSITION, NVS_FR_WEIGHT, NVS_FR_NORMAL, NVS_FR_COL0, NVS_FR_COL1,
59 NVS_FR_FOGCOORD, NVS_FR_UNKNOWN /* COLOR_INDEX */, NVS_FR_UNKNOWN,
60 NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3,
61 NVS_FR_TEXCOORD4, NVS_FR_TEXCOORD5, NVS_FR_TEXCOORD6, NVS_FR_TEXCOORD7,
62 /* Generic attribs 0-15, aliased to the above */
63 NVS_FR_POSITION, NVS_FR_WEIGHT, NVS_FR_NORMAL, NVS_FR_COL0, NVS_FR_COL1,
64 NVS_FR_FOGCOORD, NVS_FR_UNKNOWN /* COLOR_INDEX */, NVS_FR_UNKNOWN,
65 NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3,
66 NVS_FR_TEXCOORD4, NVS_FR_TEXCOORD5, NVS_FR_TEXCOORD6, NVS_FR_TEXCOORD7
67 };
68
69 static nvsFixedReg _tx_mesa_fp_src_reg[FRAG_ATTRIB_MAX] = {
70 NVS_FR_POSITION, NVS_FR_COL0, NVS_FR_COL1, NVS_FR_FOGCOORD,
71 NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3,
72 NVS_FR_TEXCOORD4, NVS_FR_TEXCOORD5, NVS_FR_TEXCOORD6, NVS_FR_TEXCOORD7
73 };
74
75 static nvsSwzComp _tx_mesa_swizzle[4] = {
76 NVS_SWZ_X, NVS_SWZ_Y, NVS_SWZ_Z, NVS_SWZ_W
77 };
78
79 static nvsOpcode _tx_mesa_opcode[] = {
80 [OPCODE_ABS] = NVS_OP_ABS, [OPCODE_ADD] = NVS_OP_ADD,
81 [OPCODE_ARA] = NVS_OP_ARA, [OPCODE_ARL] = NVS_OP_ARL,
82 [OPCODE_ARL_NV] = NVS_OP_ARL, [OPCODE_ARR] = NVS_OP_ARR,
83 [OPCODE_CMP] = NVS_OP_CMP, [OPCODE_COS] = NVS_OP_COS,
84 [OPCODE_DDX] = NVS_OP_DDX, [OPCODE_DDY] = NVS_OP_DDY,
85 [OPCODE_DP3] = NVS_OP_DP3, [OPCODE_DP4] = NVS_OP_DP4,
86 [OPCODE_DPH] = NVS_OP_DPH, [OPCODE_DST] = NVS_OP_DST,
87 [OPCODE_EX2] = NVS_OP_EX2, [OPCODE_EXP] = NVS_OP_EXP,
88 [OPCODE_FLR] = NVS_OP_FLR, [OPCODE_FRC] = NVS_OP_FRC,
89 [OPCODE_KIL] = NVS_OP_EMUL, [OPCODE_KIL_NV] = NVS_OP_KIL,
90 [OPCODE_LG2] = NVS_OP_LG2, [OPCODE_LIT] = NVS_OP_LIT,
91 [OPCODE_LOG] = NVS_OP_LOG,
92 [OPCODE_LRP] = NVS_OP_LRP,
93 [OPCODE_MAD] = NVS_OP_MAD, [OPCODE_MAX] = NVS_OP_MAX,
94 [OPCODE_MIN] = NVS_OP_MIN, [OPCODE_MOV] = NVS_OP_MOV,
95 [OPCODE_MUL] = NVS_OP_MUL,
96 [OPCODE_PK2H] = NVS_OP_PK2H, [OPCODE_PK2US] = NVS_OP_PK2US,
97 [OPCODE_PK4B] = NVS_OP_PK4B, [OPCODE_PK4UB] = NVS_OP_PK4UB,
98 [OPCODE_POW] = NVS_OP_POW, [OPCODE_POPA] = NVS_OP_POPA,
99 [OPCODE_PUSHA] = NVS_OP_PUSHA,
100 [OPCODE_RCC] = NVS_OP_RCC, [OPCODE_RCP] = NVS_OP_RCP,
101 [OPCODE_RFL] = NVS_OP_RFL, [OPCODE_RSQ] = NVS_OP_RSQ,
102 [OPCODE_SCS] = NVS_OP_SCS, [OPCODE_SEQ] = NVS_OP_SEQ,
103 [OPCODE_SFL] = NVS_OP_SFL, [OPCODE_SGE] = NVS_OP_SGE,
104 [OPCODE_SGT] = NVS_OP_SGT, [OPCODE_SIN] = NVS_OP_SIN,
105 [OPCODE_SLE] = NVS_OP_SLE, [OPCODE_SLT] = NVS_OP_SLT,
106 [OPCODE_SNE] = NVS_OP_SNE, [OPCODE_SSG] = NVS_OP_SSG,
107 [OPCODE_STR] = NVS_OP_STR, [OPCODE_SUB] = NVS_OP_SUB,
108 [OPCODE_SWZ] = NVS_OP_MOV,
109 [OPCODE_TEX] = NVS_OP_TEX, [OPCODE_TXB] = NVS_OP_TXB,
110 [OPCODE_TXD] = NVS_OP_TXD,
111 [OPCODE_TXL] = NVS_OP_TXL, [OPCODE_TXP] = NVS_OP_TXP,
112 [OPCODE_TXP_NV] = NVS_OP_TXP,
113 [OPCODE_UP2H] = NVS_OP_UP2H, [OPCODE_UP2US] = NVS_OP_UP2US,
114 [OPCODE_UP4B] = NVS_OP_UP4B, [OPCODE_UP4UB] = NVS_OP_UP4UB,
115 [OPCODE_X2D] = NVS_OP_X2D,
116 [OPCODE_XPD] = NVS_OP_XPD
117 };
118
119 static nvsCond _tx_mesa_condmask[] = {
120 NVS_COND_TR, /* workaround mesa not filling a valid value */
121 NVS_COND_GT, NVS_COND_LT, NVS_COND_UN, NVS_COND_GE,
122 NVS_COND_LE, NVS_COND_NE, NVS_COND_NE, NVS_COND_TR, NVS_COND_FL
123 };
124
125 struct pass0_rec {
126 int nvs_ipos;
127 int next_temp;
128 int swzconst_done;
129 int swzconst_id;
130 nvsRegister const_half;
131 };
132
133 #define X NVS_SWZ_X
134 #define Y NVS_SWZ_Y
135 #define Z NVS_SWZ_Z
136 #define W NVS_SWZ_W
137
138 #define FILL_CONDITION_FLAGS(fragment) do { \
139 (fragment)->cond = \
140 pass0_make_condmask(inst->DstReg.CondMask); \
141 if ((fragment)->cond != NVS_COND_TR) \
142 (fragment)->cond_test = 1; \
143 (fragment)->cond_reg = inst->CondDst; \
144 pass0_make_swizzle((fragment)->cond_swizzle, inst->DstReg.CondSwizzle);\
145 } while(0)
146
147 #define ARITH(op,dest,mask,sat,s0,s1,s2) do { \
148 nvsinst = pass0_emit(nvs, parent, fpos, (op), \
149 (dest), (mask), (sat), (s0), (s1), (s2));\
150 FILL_CONDITION_FLAGS(nvsinst); \
151 } while(0)
152
153 #define ARITHu(op,dest,mask,sat,s0,s1,s2) do { \
154 nvsinst = pass0_emit(nvs, parent, fpos, (op), \
155 (dest), (mask), (sat), (s0), (s1), (s2));\
156 } while(0)
157
158 static void
159 pass0_append_fragment(nvsFragmentHeader *parent,
160 nvsFragmentHeader *fragment,
161 int pos)
162 {
163 nvsFragmentHeader **head, **tail;
164 assert(parent && fragment);
165
166 switch (parent->type) {
167 case NVS_BRANCH:
168 if (pos == 0) {
169 head = &((nvsBranch *)parent)->target_head;
170 tail = &((nvsBranch *)parent)->target_tail;
171 } else {
172 head = &((nvsBranch *)parent)->else_head;
173 tail = &((nvsBranch *)parent)->else_tail;
174 }
175 break;
176 case NVS_LOOP:
177 head = &((nvsLoop *)parent)->insn_head;
178 tail = &((nvsLoop *)parent)->insn_tail;
179 break;
180 case NVS_SUBROUTINE:
181 head = &((nvsSubroutine *)parent)->insn_head;
182 tail = &((nvsSubroutine *)parent)->insn_tail;
183 break;
184 default:
185 assert(0);
186 break;
187 }
188
189 fragment->parent = parent;
190 fragment->prev = *tail;
191 fragment->next = NULL;
192 if (!(*head))
193 *head = fragment;
194 else
195 (*tail)->next = fragment;
196 *tail = fragment;
197
198 }
199
200 static nvsSubroutine *
201 pass0_create_subroutine(nouveauShader *nvs, const char *label)
202 {
203 nvsSubroutine *sub;
204
205 sub = CALLOC_STRUCT(nvs_subroutine);
206 if (sub) {
207 sub->header.type = NVS_SUBROUTINE;
208 sub->label = strdup(label);
209 if (!nvs->program_tree)
210 nvs->program_tree = &sub->header;
211 else
212 pass0_append_fragment(nvs->program_tree,
213 &sub->header, 0);
214 }
215
216 return sub;
217 }
218
219 static void
220 pass0_make_reg(nouveauShader *nvs, nvsRegister *reg,
221 nvsRegFile file, unsigned int index)
222 {
223 struct pass0_rec *rec = nvs->pass_rec;
224
225 /* defaults */
226 *reg = nvr_unused;
227 /* -1 == quick-and-dirty temp alloc */
228 if (file == NVS_FILE_TEMP && index == -1) {
229 index = rec->next_temp++;
230 assert(index < NVS_MAX_TEMPS);
231 }
232 reg->file = file;
233 reg->index = index;
234 }
235
236 static void
237 pass0_make_swizzle(nvsSwzComp *swz, unsigned int mesa)
238 {
239 int i;
240
241 for (i=0;i<4;i++)
242 swz[i] = _tx_mesa_swizzle[GET_SWZ(mesa, i)];
243 }
244
245 static nvsOpcode
246 pass0_make_opcode(enum prog_opcode op)
247 {
248 if (op > MAX_OPCODE)
249 return NVS_OP_UNKNOWN;
250 return _tx_mesa_opcode[op];
251 }
252
253 static nvsCond
254 pass0_make_condmask(GLuint mesa)
255 {
256 if (mesa > COND_FL)
257 return NVS_COND_UNKNOWN;
258 return _tx_mesa_condmask[mesa];
259 }
260
261 static unsigned int
262 pass0_make_mask(GLuint mesa_mask)
263 {
264 unsigned int mask = 0;
265
266 if (mesa_mask & WRITEMASK_X) mask |= SMASK_X;
267 if (mesa_mask & WRITEMASK_Y) mask |= SMASK_Y;
268 if (mesa_mask & WRITEMASK_Z) mask |= SMASK_Z;
269 if (mesa_mask & WRITEMASK_W) mask |= SMASK_W;
270
271 return mask;
272 }
273
274 static nvsTexTarget
275 pass0_make_tex_target(GLuint mesa)
276 {
277 switch (mesa) {
278 case TEXTURE_1D_INDEX: return NVS_TEX_TARGET_1D;
279 case TEXTURE_2D_INDEX: return NVS_TEX_TARGET_2D;
280 case TEXTURE_3D_INDEX: return NVS_TEX_TARGET_3D;
281 case TEXTURE_CUBE_INDEX: return NVS_TEX_TARGET_CUBE;
282 case TEXTURE_RECT_INDEX: return NVS_TEX_TARGET_RECT;
283 default:
284 return NVS_TEX_TARGET_UNKNOWN;
285 }
286 }
287
288 static void
289 pass0_make_dst_reg(nvsPtr nvs, nvsRegister *reg,
290 struct prog_dst_register *dst)
291 {
292 struct gl_program *mesa = (struct gl_program*)&nvs->mesa.vp;
293 nvsFixedReg sfr;
294
295 switch (dst->File) {
296 case PROGRAM_OUTPUT:
297 if (mesa->Target == GL_VERTEX_PROGRAM_ARB) {
298 sfr = (dst->Index < VERT_RESULT_MAX) ?
299 _tx_mesa_vp_dst_reg[dst->Index] :
300 NVS_FR_UNKNOWN;
301 } else {
302 sfr = (dst->Index < FRAG_RESULT_MAX) ?
303 _tx_mesa_fp_dst_reg[dst->Index] :
304 NVS_FR_UNKNOWN;
305 }
306 pass0_make_reg(nvs, reg, NVS_FILE_RESULT, sfr);
307 break;
308 case PROGRAM_TEMPORARY:
309 pass0_make_reg(nvs, reg, NVS_FILE_TEMP, dst->Index);
310 break;
311 case PROGRAM_ADDRESS:
312 pass0_make_reg(nvs, reg, NVS_FILE_ADDRESS, dst->Index);
313 break;
314 default:
315 fprintf(stderr, "Unknown dest file %d\n", dst->File);
316 assert(0);
317 }
318 }
319
320 static void
321 pass0_make_src_reg(nvsPtr nvs, nvsRegister *reg, struct prog_src_register *src)
322 {
323 struct gl_program *mesa = (struct gl_program *)&nvs->mesa.vp.Base;
324 struct gl_program_parameter_list *p = mesa->Parameters;
325 int i;
326
327 *reg = nvr_unused;
328
329 switch (src->File) {
330 case PROGRAM_INPUT:
331 reg->file = NVS_FILE_ATTRIB;
332 if (mesa->Target == GL_VERTEX_PROGRAM_ARB) {
333 for (i=0; i<NVS_MAX_ATTRIBS; i++) {
334 if (nvs->vp_attrib_map[i] == src->Index) {
335 reg->index = i;
336 break;
337 }
338 }
339 if (i==NVS_MAX_ATTRIBS)
340 reg->index = NVS_FR_UNKNOWN;
341 } else {
342 reg->index = (src->Index < FRAG_ATTRIB_MAX) ?
343 _tx_mesa_fp_src_reg[src->Index] :
344 NVS_FR_UNKNOWN;
345 }
346 break;
347 /* All const types seem to get shoved into here, not really sure why */
348 case PROGRAM_STATE_VAR:
349 switch (p->Parameters[src->Index].Type) {
350 case PROGRAM_NAMED_PARAM:
351 case PROGRAM_CONSTANT:
352 nvs->params[src->Index].source_val = NULL;
353 COPY_4V(nvs->params[src->Index].val,
354 p->ParameterValues[src->Index]);
355 break;
356 case PROGRAM_STATE_VAR:
357 nvs->params[src->Index].source_val =
358 p->ParameterValues[src->Index];
359 break;
360 default:
361 fprintf(stderr, "Unknown parameter type %d\n",
362 p->Parameters[src->Index].Type);
363 assert(0);
364 break;
365 }
366
367 if (src->RelAddr) {
368 reg->indexed = 1;
369 reg->addr_reg = 0;
370 reg->addr_comp = NVS_SWZ_X;
371 } else
372 reg->indexed = 0;
373 reg->file = NVS_FILE_CONST;
374 reg->index = src->Index;
375 break;
376 case PROGRAM_TEMPORARY:
377 reg->file = NVS_FILE_TEMP;
378 reg->index = src->Index;
379 break;
380 default:
381 fprintf(stderr, "Unknown source type %d\n", src->File);
382 assert(0);
383 }
384
385 /* per-component negate handled elsewhere */
386 reg->negate = src->NegateBase != 0;
387 reg->abs = src->Abs;
388 pass0_make_swizzle(reg->swizzle, src->Swizzle);
389 }
390
391 static nvsInstruction *
392 pass0_emit(nouveauShader *nvs, nvsFragmentHeader *parent, int fpos,
393 nvsOpcode op, nvsRegister dst,
394 unsigned int mask, int saturate,
395 nvsRegister src0, nvsRegister src1, nvsRegister src2)
396 {
397 nvsInstruction *sif;
398
399 sif = CALLOC_STRUCT(nvs_instruction);
400 if (!sif)
401 return NULL;
402
403 /* Seems mesa doesn't explicitly 0 this.. */
404 if (nvs->mesa.vp.Base.Target == GL_VERTEX_PROGRAM_ARB)
405 saturate = 0;
406
407 sif->op = op;
408 sif->saturate = saturate;
409 sif->dest = dst;
410 sif->mask = mask;
411 sif->dest_scale = NVS_SCALE_1X;
412 sif->src[0] = src0;
413 sif->src[1] = src1;
414 sif->src[2] = src2;
415 sif->cond = COND_TR;
416 sif->cond_reg = 0;
417 sif->cond_test = 0;
418 sif->cond_update= 0;
419 pass0_make_swizzle(sif->cond_swizzle, SWIZZLE_NOOP);
420 pass0_append_fragment(parent, &sif->header, fpos);
421
422 return sif;
423 }
424
425 static void
426 pass0_fixup_swizzle(nvsPtr nvs, nvsFragmentHeader *parent, int fpos,
427 struct prog_src_register *src,
428 unsigned int sm1,
429 unsigned int sm2)
430 {
431 static const float sc[4] = { 1.0, 0.0, -1.0, 0.0 };
432 struct pass0_rec *rec = nvs->pass_rec;
433 int fixup_1, fixup_2;
434 nvsInstruction *nvsinst;
435 nvsRegister sr, dr = nvr_unused;
436 nvsRegister sm1const, sm2const;
437
438 if (!rec->swzconst_done) {
439 struct gl_program *prog = &nvs->mesa.vp.Base;
440 rec->swzconst_id = _mesa_add_unnamed_constant(prog->Parameters,
441 sc, 4);
442 rec->swzconst_done = 1;
443 COPY_4V(nvs->params[rec->swzconst_id].val, sc);
444 }
445
446 fixup_1 = (sm1 != MAKE_SWIZZLE4(0,0,0,0) &&
447 sm2 != MAKE_SWIZZLE4(2,2,2,2));
448 fixup_2 = (sm2 != MAKE_SWIZZLE4(2,2,2,2));
449
450 if (src->File != PROGRAM_TEMPORARY && src->File != PROGRAM_INPUT) {
451 /* We can't use more than one const in an instruction,
452 * so move the const into a temp, and swizzle from there.
453 *
454 * TODO: should just emit the swizzled const, instead of
455 * swizzling it in the shader.. would need to reswizzle
456 * any state params when they change however..
457 */
458 pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1);
459 pass0_make_src_reg(nvs, &sr, src);
460 ARITHu(NVS_OP_MOV, dr, SMASK_ALL, 0,
461 sr, nvr_unused, nvr_unused);
462 pass0_make_reg(nvs, &sr, NVS_FILE_TEMP, dr.index);
463 } else {
464 if (fixup_1)
465 src->NegateBase = 0;
466 pass0_make_src_reg(nvs, &sr, src);
467 pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1);
468 }
469
470 pass0_make_reg(nvs, &sm1const, NVS_FILE_CONST, rec->swzconst_id);
471 pass0_make_swizzle(sm1const.swizzle, sm1);
472 if (fixup_1 && fixup_2) {
473 /* Any combination with SWIZZLE_ONE */
474 pass0_make_reg(nvs, &sm2const,
475 NVS_FILE_CONST, rec->swzconst_id);
476 pass0_make_swizzle(sm2const.swizzle, sm2);
477 ARITHu(NVS_OP_MAD, dr, SMASK_ALL, 0, sr, sm1const, sm2const);
478 } else {
479 /* SWIZZLE_ZERO || arbitrary negate */
480 ARITHu(NVS_OP_MUL, dr, SMASK_ALL, 0, sr, sm1const, nvr_unused);
481 }
482
483 src->File = PROGRAM_TEMPORARY;
484 src->Index = dr.index;
485 src->Swizzle = SWIZZLE_NOOP;
486 }
487
488 #define SET_SWZ(fs, cp, c) fs = (fs & ~(0x7<<(cp*3))) | (c<<(cp*3))
489 static void
490 pass0_check_sources(nvsPtr nvs, nvsFragmentHeader *parent, int fpos,
491 struct prog_instruction *inst)
492 {
493 unsigned int insrc = -1, constsrc = -1;
494 int i;
495
496 for (i=0;i<_mesa_num_inst_src_regs(inst->Opcode);i++) {
497 struct prog_src_register *src = &inst->SrcReg[i];
498 unsigned int sm_1 = 0, sm_2 = 0;
499 nvsRegister sr, dr;
500 int do_mov = 0, c;
501
502 /* Build up swizzle masks as if we were going to use
503 * "MAD new, src, const1, const2" to support arbitrary negation
504 * and SWIZZLE_ZERO/SWIZZLE_ONE.
505 */
506 for (c=0;c<4;c++) {
507 if (GET_SWZ(src->Swizzle, c) == SWIZZLE_ZERO) {
508 SET_SWZ(sm_1, c, SWIZZLE_Y); /* 0.0 */
509 SET_SWZ(sm_2, c, SWIZZLE_Y);
510 SET_SWZ(src->Swizzle, c, SWIZZLE_X);
511 } else if (GET_SWZ(src->Swizzle, c) == SWIZZLE_ONE) {
512 SET_SWZ(sm_1, c, SWIZZLE_Y);
513 if (src->NegateBase & (1<<c))
514 SET_SWZ(sm_2, c, SWIZZLE_Z); /* -1.0 */
515 else
516 SET_SWZ(sm_2, c, SWIZZLE_X); /* 1.0 */
517 SET_SWZ(src->Swizzle, c, SWIZZLE_X);
518 } else {
519 if (src->NegateBase & (1<<c))
520 SET_SWZ(sm_1, c, SWIZZLE_Z); /* -[xyzw] */
521 else
522 SET_SWZ(sm_1, c, SWIZZLE_X); /*[xyzw]*/
523 SET_SWZ(sm_2, c, SWIZZLE_Y);
524 }
525 }
526
527 /* Unless we're multiplying by 1.0 or -1.0 on all components,
528 * and we're adding nothing to any component we have to
529 * emulate the swizzle.
530 */
531 if ((sm_1 != MAKE_SWIZZLE4(0,0,0,0) &&
532 sm_1 != MAKE_SWIZZLE4(2,2,2,2)) ||
533 sm_2 != MAKE_SWIZZLE4(1,1,1,1)) {
534 pass0_fixup_swizzle(nvs, parent, fpos, src, sm_1, sm_2);
535 /* The source is definitely in a temp now, so don't
536 * bother checking for multiple ATTRIB/CONST regs.
537 */
538 continue;
539 }
540
541 /* HW can't use more than one ATTRIB or PARAM in a single
542 * instruction */
543 switch (src->File) {
544 case PROGRAM_INPUT:
545 if (insrc != -1 && insrc != src->Index)
546 do_mov = 1;
547 else insrc = src->Index;
548 break;
549 case PROGRAM_STATE_VAR:
550 if (constsrc != -1 && constsrc != src->Index)
551 do_mov = 1;
552 else constsrc = src->Index;
553 break;
554 default:
555 break;
556 }
557
558 /* Emit any extra ATTRIB/CONST to a temp, and modify the Mesa
559 * instruction to point at the temp.
560 */
561 if (do_mov) {
562 pass0_make_src_reg(nvs, &sr, src);
563 pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1);
564 pass0_emit(nvs, parent, fpos, NVS_OP_MOV,
565 dr, SMASK_ALL, 0,
566 sr, nvr_unused, nvr_unused);
567
568 src->File = PROGRAM_TEMPORARY;
569 src->Index = dr.index;
570 src->Swizzle= SWIZZLE_NOOP;
571 }
572 }
573 }
574
575 static GLboolean
576 pass0_emulate_instruction(nouveauShader *nvs,
577 nvsFragmentHeader *parent, int fpos,
578 struct prog_instruction *inst)
579 {
580 nvsFunc *shader = nvs->func;
581 nvsRegister src[3], dest, temp;
582 nvsInstruction *nvsinst;
583 struct pass0_rec *rec = nvs->pass_rec;
584 unsigned int mask = pass0_make_mask(inst->DstReg.WriteMask);
585 int i, sat;
586
587 sat = (inst->SaturateMode == SATURATE_ZERO_ONE);
588
589 /* Build all the "real" regs for the instruction */
590 for (i=0; i<_mesa_num_inst_src_regs(inst->Opcode); i++)
591 pass0_make_src_reg(nvs, &src[i], &inst->SrcReg[i]);
592 if (inst->Opcode != OPCODE_KIL)
593 pass0_make_dst_reg(nvs, &dest, &inst->DstReg);
594
595 switch (inst->Opcode) {
596 case OPCODE_ABS:
597 if (shader->caps & SCAP_SRC_ABS)
598 ARITH(NVS_OP_MOV, dest, mask, sat,
599 nvsAbs(src[0]), nvr_unused, nvr_unused);
600 else
601 ARITH(NVS_OP_MAX, dest, mask, sat,
602 src[0], nvsNegate(src[0]), nvr_unused);
603 break;
604 case OPCODE_CMP:
605 /*XXX: this will clobber CC0... */
606 ARITH (NVS_OP_MOV, dest, mask, sat,
607 src[2], nvr_unused, nvr_unused);
608 pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
609 ARITHu(NVS_OP_MOV, temp, SMASK_ALL, 0,
610 src[0], nvr_unused, nvr_unused);
611 nvsinst->cond_update = 1;
612 nvsinst->cond_reg = 0;
613 ARITH (NVS_OP_MOV, dest, mask, sat,
614 src[1], nvr_unused, nvr_unused);
615 nvsinst->cond = COND_LT;
616 nvsinst->cond_reg = 0;
617 nvsinst->cond_test = 1;
618 break;
619 case OPCODE_DPH:
620 pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
621 ARITHu(NVS_OP_DP3, temp, SMASK_X, 0,
622 src[0], src[1], nvr_unused);
623 ARITH (NVS_OP_ADD, dest, mask, sat,
624 nvsSwizzle(temp, X, X, X, X),
625 nvsSwizzle(src[1], W, W, W, W),
626 nvr_unused);
627 break;
628 case OPCODE_KIL:
629 /* This is only in ARB shaders, so we don't have to worry
630 * about clobbering a CC reg as they aren't supported anyway.
631 *XXX: might have to worry with GLSL however...
632 */
633 /* MOVC0 temp, src */
634 pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
635 ARITHu(NVS_OP_MOV, temp, SMASK_ALL, 0,
636 src[0], nvr_unused, nvr_unused);
637 nvsinst->cond_update = 1;
638 nvsinst->cond_reg = 0;
639 /* KIL_NV (LT0.xyzw) temp */
640 ARITHu(NVS_OP_KIL, nvr_unused, 0, 0,
641 nvr_unused, nvr_unused, nvr_unused);
642 nvsinst->cond = COND_LT;
643 nvsinst->cond_reg = 0;
644 nvsinst->cond_test = 1;
645 pass0_make_swizzle(nvsinst->cond_swizzle, SWIZZLE_NOOP);
646 break;
647 case OPCODE_LRP:
648 pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
649 ARITHu(NVS_OP_MAD, temp, mask, 0,
650 nvsNegate(src[0]), src[2], src[2]);
651 ARITH (NVS_OP_MAD, dest, mask, sat, src[0], src[1], temp);
652 break;
653 case OPCODE_POW:
654 if (shader->SupportsOpcode(shader, NVS_OP_LG2) &&
655 shader->SupportsOpcode(shader, NVS_OP_EX2)) {
656 pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
657 /* LG2 temp.x, src0.c */
658 ARITHu(NVS_OP_LG2, temp, SMASK_X, 0,
659 nvsSwizzle(src[0], X, X, X, X),
660 nvr_unused, nvr_unused);
661 /* MUL temp.x, temp.x, src1.c */
662 ARITHu(NVS_OP_MUL, temp, SMASK_X, 0,
663 nvsSwizzle(temp, X, X, X, X),
664 nvsSwizzle(src[1], X, X, X, X),
665 nvr_unused);
666 /* EX2 dest, temp.x */
667 ARITH (NVS_OP_EX2, dest, mask, sat,
668 nvsSwizzle(temp, X, X, X, X),
669 nvr_unused, nvr_unused);
670 } else {
671 /* can we use EXP/LOG instead of EX2/LG2?? */
672 fprintf(stderr, "Implement POW for NV20 vtxprog!\n");
673 return GL_FALSE;
674 }
675 break;
676 case OPCODE_RSQ:
677 pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
678 ARITHu(NVS_OP_LG2, temp, SMASK_X, 0,
679 nvsAbs(nvsSwizzle(src[0], X, X, X, X)),
680 nvr_unused, nvr_unused);
681 nvsinst->dest_scale = NVS_SCALE_INV_2X;
682 ARITH (NVS_OP_EX2, dest, mask, sat,
683 nvsNegate(nvsSwizzle(temp, X, X, X, X)),
684 nvr_unused, nvr_unused);
685 break;
686 case OPCODE_SCS:
687 if (mask & SMASK_X)
688 ARITH(NVS_OP_COS, dest, SMASK_X, sat,
689 nvsSwizzle(src[0], X, X, X, X),
690 nvr_unused, nvr_unused);
691 if (mask & SMASK_Y)
692 ARITH(NVS_OP_SIN, dest, SMASK_Y, sat,
693 nvsSwizzle(src[0], X, X, X, X),
694 nvr_unused, nvr_unused);
695 break;
696 case OPCODE_SUB:
697 ARITH(NVS_OP_ADD, dest, mask, sat,
698 src[0], nvsNegate(src[1]), nvr_unused);
699 break;
700 case OPCODE_XPD:
701 pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
702 ARITHu(NVS_OP_MUL, temp, SMASK_ALL, 0,
703 nvsSwizzle(src[0], Z, X, Y, Y),
704 nvsSwizzle(src[1], Y, Z, X, X),
705 nvr_unused);
706 ARITH (NVS_OP_MAD, dest, (mask & ~SMASK_W), sat,
707 nvsSwizzle(src[0], Y, Z, X, X),
708 nvsSwizzle(src[1], Z, X, Y, Y),
709 nvsNegate(temp));
710 break;
711 default:
712 WARN_ONCE("hw doesn't support opcode \"%s\","
713 "and no emulation found\n",
714 _mesa_opcode_string(inst->Opcode));
715 return GL_FALSE;
716 }
717
718 return GL_TRUE;
719 }
720
721 static GLboolean
722 pass0_translate_arith(nouveauShader *nvs, struct gl_program *prog,
723 int ipos, int fpos,
724 nvsFragmentHeader *parent)
725 {
726 struct prog_instruction *inst = &prog->Instructions[ipos];
727 nvsFunc *shader = nvs->func;
728 nvsInstruction *nvsinst;
729 GLboolean ret;
730
731 /* Deal with multiple ATTRIB/PARAM in a single instruction */
732 pass0_check_sources(nvs, parent, fpos, inst);
733
734 /* Now it's safe to do the prog_instruction->nvsInstruction
735 * conversion
736 */
737 if (shader->SupportsOpcode(shader,
738 pass0_make_opcode(inst->Opcode))) {
739 nvsRegister src[3], dest;
740 int i;
741
742 for (i=0; i<_mesa_num_inst_src_regs(inst->Opcode); i++)
743 pass0_make_src_reg(nvs, &src[i], &inst->SrcReg[i]);
744 pass0_make_dst_reg(nvs, &dest, &inst->DstReg);
745
746 ARITH(pass0_make_opcode(inst->Opcode), dest,
747 pass0_make_mask(inst->DstReg.WriteMask),
748 (inst->SaturateMode != SATURATE_OFF),
749 src[0], src[1], src[2]);
750 nvsinst->tex_unit = inst->TexSrcUnit;
751 nvsinst->tex_target = pass0_make_tex_target(inst->TexSrcTarget);
752
753 ret = GL_TRUE;
754 } else
755 ret = pass0_emulate_instruction(nvs, parent, fpos, inst);
756
757 return ret;
758 }
759
760 static GLboolean
761 pass0_translate_instructions(nouveauShader *nvs, int ipos, int fpos,
762 nvsFragmentHeader *parent)
763 {
764 struct gl_program *prog = (struct gl_program *)&nvs->mesa.vp;
765
766 while (1) {
767 struct prog_instruction *inst = &prog->Instructions[ipos];
768
769 switch (inst->Opcode) {
770 case OPCODE_END:
771 return GL_TRUE;
772 case OPCODE_BRA:
773 case OPCODE_CAL:
774 case OPCODE_RET:
775 //case OPCODE_LOOP:
776 //case OPCODE_ENDLOOP:
777 //case OPCODE_IF:
778 //case OPCODE_ELSE:
779 //case OPCODE_ENDIF:
780 WARN_ONCE("branch ops unimplemented\n");
781 return GL_FALSE;
782 break;
783 default:
784 if (!pass0_translate_arith(nvs, prog,
785 ipos, fpos, parent))
786 return GL_FALSE;
787 break;
788 }
789
790 ipos++;
791 }
792
793 return GL_TRUE;
794 }
795
796 static void
797 pass0_build_attrib_map(nouveauShader *nvs, struct gl_vertex_program *vp)
798 {
799 GLuint inputs_read = vp->Base.InputsRead;
800 GLuint input_alloc = ~0xFFFF;
801 int i;
802
803 for (i=0; i<NVS_MAX_ATTRIBS; i++)
804 nvs->vp_attrib_map[i] = -1;
805
806 while (inputs_read) {
807 int in = ffs(inputs_read) - 1;
808 int hw;
809 inputs_read &= ~(1<<in);
810
811 if (vp->IsNVProgram) {
812 /* NVvp: must alias */
813 if (in >= VERT_ATTRIB_GENERIC0)
814 hw = in - VERT_ATTRIB_GENERIC0;
815 else
816 hw = in;
817 } else {
818 /* ARBvp: may alias
819 * GL2.0: must not alias
820 */
821 if (in >= VERT_ATTRIB_GENERIC0)
822 hw = ffs(~input_alloc) - 1;
823 else
824 hw = in;
825 input_alloc |= (1<<hw);
826 }
827
828 nvs->vp_attrib_map[hw] = in;
829 }
830
831 if (NOUVEAU_DEBUG & DEBUG_SHADERS) {
832 printf("vtxprog attrib map:\n");
833 for (i=0; i<NVS_MAX_ATTRIBS; i++) {
834 printf(" hw:%d = attrib:%d\n",
835 i, nvs->vp_attrib_map[i]);
836 }
837 }
838 }
839
840 GLboolean
841 nouveau_shader_pass0(GLcontext *ctx, nouveauShader *nvs)
842 {
843 nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx);
844 struct gl_program *prog = (struct gl_program*)nvs;
845 struct gl_vertex_program *vp = (struct gl_vertex_program *)prog;
846 struct gl_fragment_program *fp = (struct gl_fragment_program *)prog;
847 struct pass0_rec *rec;
848 int ret = GL_FALSE;
849
850 switch (prog->Target) {
851 case GL_VERTEX_PROGRAM_ARB:
852 nvs->func = &nmesa->VPfunc;
853
854 pass0_build_attrib_map(nvs, vp);
855
856 if (vp->IsPositionInvariant)
857 _mesa_insert_mvp_code(ctx, vp);
858 #if 0
859 if (IS_FIXEDFUNCTION_PROG && CLIP_PLANES_USED)
860 pass0_insert_ff_clip_planes();
861 #endif
862 break;
863 case GL_FRAGMENT_PROGRAM_ARB:
864 nvs->func = &nmesa->FPfunc;
865
866 if (fp->FogOption != GL_NONE)
867 _mesa_append_fog_code(ctx, fp);
868 break;
869 default:
870 fprintf(stderr, "Unknown program type %d", prog->Target);
871 return GL_FALSE;
872 }
873 nvs->func->card_priv = &nvs->card_priv;
874
875 rec = CALLOC_STRUCT(pass0_rec);
876 if (rec) {
877 rec->next_temp = prog->NumTemporaries;
878 nvs->pass_rec = rec;
879
880 nvs->program_tree = (nvsFragmentHeader*)
881 pass0_create_subroutine(nvs, "program body");
882 if (nvs->program_tree) {
883 ret = pass0_translate_instructions(nvs,
884 0, 0,
885 nvs->program_tree);
886 /*XXX: if (!ret) DESTROY TREE!!! */
887 }
888 FREE(rec);
889 }
890
891 return ret;
892 }
893