replace __inline and __inline__ with INLINE macro
[mesa.git] / src / mesa / drivers / dri / nouveau / nv20_vertprog.c
1 #include "nouveau_context.h"
2 #include "nouveau_object.h"
3 #include "nouveau_fifo.h"
4 #include "nouveau_reg.h"
5
6 #include "nouveau_shader.h"
7 #include "nv20_shader.h"
8
9 unsigned int NVVP_TX_VOP_COUNT = 16;
10 unsigned int NVVP_TX_NVS_OP_COUNT = 16;
11 struct _op_xlat NVVP_TX_VOP[32];
12 struct _op_xlat NVVP_TX_SOP[32];
13
14 nvsSwzComp NV20VP_TX_SWIZZLE[4] = { NVS_SWZ_X, NVS_SWZ_Y, NVS_SWZ_Z, NVS_SWZ_W };
15
16 /*****************************************************************************
17 * Support routines
18 */
19 static void
20 NV20VPUploadToHW(GLcontext *ctx, nouveauShader *nvs)
21 {
22 nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx);
23 int i;
24
25 /* XXX: missing a way to say what insn we're uploading from, and possible
26 * the program start position (if NV20 has one) */
27 for (i=0; i<nvs->program_size; i+=4) {
28 BEGIN_RING_SIZE(NvSub3D, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0, 4);
29 OUT_RING(nvs->program[i + 0]);
30 OUT_RING(nvs->program[i + 1]);
31 OUT_RING(nvs->program[i + 2]);
32 OUT_RING(nvs->program[i + 3]);
33 }
34 }
35
36 static void
37 NV20VPUpdateConst(GLcontext *ctx, nouveauShader *nvs, int id)
38 {
39 nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx);
40
41 /* Worth checking if the value *actually* changed? Mesa doesn't tell us this
42 * as far as I know..
43 */
44 BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_ID, 1);
45 OUT_RING (id);
46 BEGIN_RING_SIZE(NvSub3D, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 4);
47 OUT_RINGf(nvs->params[id].source_val[0]);
48 OUT_RINGf(nvs->params[id].source_val[1]);
49 OUT_RINGf(nvs->params[id].source_val[2]);
50 OUT_RINGf(nvs->params[id].source_val[3]);
51 }
52
53 /*****************************************************************************
54 * Assembly routines
55 */
56
57 /*****************************************************************************
58 * Disassembly routines
59 */
60 void
61 NV20VPTXSwizzle(int hwswz, nvsSwzComp *swz)
62 {
63 swz[NVS_SWZ_X] = NV20VP_TX_SWIZZLE[(hwswz & 0xC0) >> 6];
64 swz[NVS_SWZ_Y] = NV20VP_TX_SWIZZLE[(hwswz & 0x30) >> 4];
65 swz[NVS_SWZ_Z] = NV20VP_TX_SWIZZLE[(hwswz & 0x0C) >> 2];
66 swz[NVS_SWZ_W] = NV20VP_TX_SWIZZLE[(hwswz & 0x03) >> 0];
67 }
68
69 static int
70 NV20VPHasMergedInst(nvsFunc * shader)
71 {
72 if (shader->GetOpcodeHW(shader, 0) != NV20_VP_INST_OPCODE_NOP &&
73 shader->GetOpcodeHW(shader, 1) != NV20_VP_INST_OPCODE_NOP)
74 printf
75 ("\n\n*****both opcode fields have values - PLEASE REPORT*****\n");
76 return 0;
77 }
78
79 static int
80 NV20VPIsLastInst(nvsFunc * shader)
81 {
82 return ((shader->inst[3] & (1 << 0)) ? 1 : 0);
83 }
84
85 static int
86 NV20VPGetOffsetNext(nvsFunc * shader)
87 {
88 return 4;
89 }
90
91 static struct _op_xlat *
92 NV20VPGetOPTXRec(nvsFunc * shader, int merged)
93 {
94 struct _op_xlat *opr;
95 int op;
96
97 if (shader->GetOpcodeSlot(shader, merged)) {
98 opr = NVVP_TX_SOP;
99 op = shader->GetOpcodeHW(shader, 1);
100 if (op >= NVVP_TX_NVS_OP_COUNT)
101 return NULL;
102 }
103 else {
104 opr = NVVP_TX_VOP;
105 op = shader->GetOpcodeHW(shader, 0);
106 if (op >= NVVP_TX_VOP_COUNT)
107 return NULL;
108 }
109
110 if (opr[op].SOP == NVS_OP_UNKNOWN)
111 return NULL;
112 return &opr[op];
113 }
114
115 static struct _op_xlat *
116 NV20VPGetOPTXFromSOP(nvsOpcode sop, int *id)
117 {
118 int i;
119
120 for (i=0;i<NVVP_TX_VOP_COUNT;i++) {
121 if (NVVP_TX_VOP[i].SOP == sop) {
122 if (id) *id = 0;
123 return &NVVP_TX_VOP[i];
124 }
125 }
126
127 for (i=0;i<NVVP_TX_NVS_OP_COUNT;i++) {
128 if (NVVP_TX_SOP[i].SOP == sop) {
129 if (id) *id = 1;
130 return &NVVP_TX_SOP[i];
131 }
132 }
133
134 return NULL;
135 }
136
137 static int
138 NV20VPGetOpcodeSlot(nvsFunc * shader, int merged)
139 {
140 if (shader->HasMergedInst(shader))
141 return merged;
142 if (shader->GetOpcodeHW(shader, 0) == NV20_VP_INST_OPCODE_NOP)
143 return 1;
144 return 0;
145 }
146
147 static nvsOpcode
148 NV20VPGetOpcode(nvsFunc * shader, int merged)
149 {
150 struct _op_xlat *opr;
151
152 opr = shader->GetOPTXRec(shader, merged);
153 if (!opr)
154 return NVS_OP_UNKNOWN;
155
156 return opr->SOP;
157 }
158
159 static nvsOpcode
160 NV20VPGetOpcodeHW(nvsFunc * shader, int slot)
161 {
162 if (slot)
163 return (shader->inst[1] & NV20_VP_INST_SCA_OPCODE_MASK)
164 >> NV20_VP_INST_SCA_OPCODE_SHIFT;
165 return (shader->inst[1] & NV20_VP_INST_VEC_OPCODE_MASK)
166 >> NV20_VP_INST_VEC_OPCODE_SHIFT;
167 }
168
169 static nvsRegFile
170 NV20VPGetDestFile(nvsFunc * shader, int merged)
171 {
172 switch (shader->GetOpcode(shader, merged)) {
173 case NVS_OP_ARL:
174 return NVS_FILE_ADDRESS;
175 default:
176 /*FIXME: This probably isn't correct.. */
177 if ((shader->inst[3] & NV20_VP_INST_DEST_WRITEMASK_MASK) == 0)
178 return NVS_FILE_TEMP;
179 return NVS_FILE_RESULT;
180 }
181 }
182
183 static unsigned int
184 NV20VPGetDestID(nvsFunc * shader, int merged)
185 {
186 int id;
187
188 switch (shader->GetDestFile(shader, merged)) {
189 case NVS_FILE_RESULT:
190 id = ((shader->inst[3] & NV20_VP_INST_DEST_MASK)
191 >> NV20_VP_INST_DEST_SHIFT);
192 switch (id) {
193 case NV20_VP_INST_DEST_POS : return NVS_FR_POSITION;
194 case NV20_VP_INST_DEST_COL0 : return NVS_FR_COL0;
195 case NV20_VP_INST_DEST_COL1 : return NVS_FR_COL1;
196 case NV20_VP_INST_DEST_TC(0): return NVS_FR_TEXCOORD0;
197 case NV20_VP_INST_DEST_TC(1): return NVS_FR_TEXCOORD1;
198 case NV20_VP_INST_DEST_TC(2): return NVS_FR_TEXCOORD2;
199 case NV20_VP_INST_DEST_TC(3): return NVS_FR_TEXCOORD3;
200 default:
201 return -1;
202 }
203 case NVS_FILE_ADDRESS:
204 return 0;
205 case NVS_FILE_TEMP:
206 id = ((shader->inst[3] & NV20_VP_INST_DEST_TEMP_ID_MASK)
207 >> NV20_VP_INST_DEST_TEMP_ID_SHIFT);
208 return id;
209 default:
210 return -1;
211 }
212 }
213
214 static unsigned int
215 NV20VPGetDestMask(nvsFunc * shader, int merged)
216 {
217 int hwmask, mask = 0;
218
219 /* Special handling for ARL - hardware only supports a
220 * 1-component address reg
221 */
222 if (shader->GetOpcode(shader, merged) == NVS_OP_ARL)
223 return SMASK_X;
224
225 if (shader->GetDestFile(shader, merged) == NVS_FILE_RESULT)
226 hwmask = (shader->inst[3] & NV20_VP_INST_DEST_WRITEMASK_MASK)
227 >> NV20_VP_INST_DEST_WRITEMASK_SHIFT;
228 else if (shader->GetOpcodeSlot(shader, merged))
229 hwmask = (shader->inst[3] & NV20_VP_INST_STEMP_WRITEMASK_MASK)
230 >> NV20_VP_INST_STEMP_WRITEMASK_SHIFT;
231 else
232 hwmask = (shader->inst[3] & NV20_VP_INST_VTEMP_WRITEMASK_MASK)
233 >> NV20_VP_INST_VTEMP_WRITEMASK_SHIFT;
234
235 if (hwmask & (1 << 3)) mask |= SMASK_X;
236 if (hwmask & (1 << 2)) mask |= SMASK_Y;
237 if (hwmask & (1 << 1)) mask |= SMASK_Z;
238 if (hwmask & (1 << 0)) mask |= SMASK_W;
239
240 return mask;
241 }
242
243 static unsigned int
244 NV20VPGetSourceHW(nvsFunc * shader, int merged, int pos)
245 {
246 struct _op_xlat *opr;
247 unsigned int src;
248
249 opr = shader->GetOPTXRec(shader, merged);
250 if (!opr)
251 return -1;
252
253 switch (opr->srcpos[pos]) {
254 case 0:
255 src = ((shader->inst[1] & NV20_VP_INST_SRC0H_MASK)
256 >> NV20_VP_INST_SRC0H_SHIFT)
257 << NV20_VP_SRC0_HIGH_SHIFT;
258 src |= ((shader->inst[2] & NV20_VP_INST_SRC0L_MASK)
259 >> NV20_VP_INST_SRC0L_SHIFT);
260 break;
261 case 1:
262 src = ((shader->inst[2] & NV20_VP_INST_SRC1_MASK)
263 >> NV20_VP_INST_SRC1_SHIFT);
264 break;
265 case 2:
266 src = ((shader->inst[2] & NV20_VP_INST_SRC2H_MASK)
267 >> NV20_VP_INST_SRC2H_SHIFT)
268 << NV20_VP_SRC2_HIGH_SHIFT;
269 src |= ((shader->inst[3] & NV20_VP_INST_SRC2L_MASK)
270 >> NV20_VP_INST_SRC2L_SHIFT);
271 break;
272 default:
273 src = -1;
274 }
275
276 return src;
277 }
278
279 static nvsRegFile
280 NV20VPGetSourceFile(nvsFunc * shader, int merged, int pos)
281 {
282 unsigned int src;
283 struct _op_xlat *opr;
284 int file;
285
286 opr = shader->GetOPTXRec(shader, merged);
287 if (!opr || opr->srcpos[pos] == -1)
288 return -1;
289
290 switch (opr->srcpos[pos]) {
291 case SPOS_ADDRESS:
292 return NVS_FILE_ADDRESS;
293 default:
294 src = NV20VPGetSourceHW(shader, merged, pos);
295 file = (src & NV20_VP_SRC_REG_TYPE_MASK) >> NV20_VP_SRC_REG_TYPE_SHIFT;
296
297 switch (file) {
298 case NV20_VP_SRC_REG_TYPE_TEMP : return NVS_FILE_TEMP;
299 case NV20_VP_SRC_REG_TYPE_INPUT: return NVS_FILE_ATTRIB;
300 case NV20_VP_SRC_REG_TYPE_CONST: return NVS_FILE_CONST;
301 default:
302 return NVS_FILE_UNKNOWN;
303 }
304 }
305 }
306
307 static int
308 NV20VPGetSourceID(nvsFunc * shader, int merged, int pos)
309 {
310 unsigned int src;
311
312 switch (shader->GetSourceFile(shader, merged, pos)) {
313 case NVS_FILE_TEMP:
314 src = shader->GetSourceHW(shader, merged, pos);
315 return ((src & NV20_VP_SRC_REG_TEMP_ID_MASK) >>
316 NV20_VP_SRC_REG_TEMP_ID_SHIFT);
317 case NVS_FILE_CONST:
318 return ((shader->inst[1] & NV20_VP_INST_CONST_SRC_MASK)
319 >> NV20_VP_INST_CONST_SRC_SHIFT);
320 case NVS_FILE_ATTRIB:
321 src = ((shader->inst[1] & NV20_VP_INST_INPUT_SRC_MASK)
322 >> NV20_VP_INST_INPUT_SRC_SHIFT);
323 switch (src) {
324 case NV20_VP_INST_INPUT_SRC_POS : return NVS_FR_POSITION;
325 case NV20_VP_INST_INPUT_SRC_COL0 : return NVS_FR_COL0;
326 case NV20_VP_INST_INPUT_SRC_COL1 : return NVS_FR_COL1;
327 case NV20_VP_INST_INPUT_SRC_TC(0): return NVS_FR_TEXCOORD0;
328 case NV20_VP_INST_INPUT_SRC_TC(1): return NVS_FR_TEXCOORD1;
329 case NV20_VP_INST_INPUT_SRC_TC(2): return NVS_FR_TEXCOORD2;
330 case NV20_VP_INST_INPUT_SRC_TC(3): return NVS_FR_TEXCOORD3;
331 default:
332 return NVS_FR_UNKNOWN;
333 }
334 default:
335 return -1;
336 }
337 }
338
339 static int
340 NV20VPGetSourceNegate(nvsFunc * shader, int merged, int pos)
341 {
342 unsigned int src;
343
344 src = shader->GetSourceHW(shader, merged, pos);
345
346 return ((src & NV20_VP_SRC_REG_NEGATE) ? 1 : 0);
347 }
348
349 static int
350 NV20VPGetSourceAbs(nvsFunc * shader, int merged, int pos)
351 {
352 /* NV20 can't do ABS on sources? Appears to be emulated with
353 * MAX reg, reg, -reg
354 */
355 return 0;
356 }
357
358 static void
359 NV20VPGetSourceSwizzle(nvsFunc * shader, int merged, int pos, nvsSwzComp *swz)
360 {
361 unsigned int src;
362 int swzbits;
363
364 src = shader->GetSourceHW(shader, merged, pos);
365 swzbits =
366 (src & NV20_VP_SRC_REG_SWZ_ALL_MASK) >> NV20_VP_SRC_REG_SWZ_ALL_SHIFT;
367 return NV20VPTXSwizzle(swzbits, swz);
368 }
369
370 static int
371 NV20VPGetSourceIndexed(nvsFunc * shader, int merged, int pos)
372 {
373 /* I don't think NV20 can index into attribs, at least no GL
374 * extension is exposed that will allow it.
375 */
376 if (shader->GetSourceFile(shader, merged, pos) != NVS_FILE_CONST)
377 return 0;
378 if (shader->inst[3] & NV20_VP_INST_INDEX_CONST)
379 return 1;
380 return 0;
381 }
382
383 static int
384 NV20VPGetAddressRegID(nvsFunc * shader)
385 {
386 /* Only 1 address reg */
387 return 0;
388 }
389
390 static nvsSwzComp
391 NV20VPGetAddressRegSwizzle(nvsFunc * shader)
392 {
393 /* Only A0.x available */
394 return NVS_SWZ_X;
395 }
396
397 void
398 NV20VPInitShaderFuncs(nvsFunc * shader)
399 {
400 MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_NOP, NVS_OP_NOP, -1, -1, -1);
401 MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_MOV, NVS_OP_MOV, 0, -1, -1);
402 MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_MUL, NVS_OP_MUL, 0, 1, -1);
403 MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_ADD, NVS_OP_ADD, 0, 2, -1);
404 MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_MAD, NVS_OP_MAD, 0, 1, 2);
405 MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_DP3, NVS_OP_DP3, 0, 1, -1);
406 MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_DPH, NVS_OP_DPH, 0, 1, -1);
407 MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_DP4, NVS_OP_DP4, 0, 1, -1);
408 MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_DST, NVS_OP_DST, 0, 1, -1);
409 MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_MIN, NVS_OP_MIN, 0, 1, -1);
410 MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_MAX, NVS_OP_MAX, 0, 1, -1);
411 MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_SLT, NVS_OP_SLT, 0, 1, -1);
412 MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_SGE, NVS_OP_SGE, 0, 1, -1);
413 MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_ARL, NVS_OP_ARL, 0, -1, -1);
414
415 MOD_OPCODE(NVVP_TX_SOP, NV20_VP_INST_OPCODE_NOP, NVS_OP_NOP, -1, -1, -1);
416 MOD_OPCODE(NVVP_TX_SOP, NV20_VP_INST_OPCODE_RCP, NVS_OP_RCP, 2, -1, -1);
417 MOD_OPCODE(NVVP_TX_SOP, NV20_VP_INST_OPCODE_RCC, NVS_OP_RCC, 2, -1, -1);
418 MOD_OPCODE(NVVP_TX_SOP, NV20_VP_INST_OPCODE_RSQ, NVS_OP_RSQ, 2, -1, -1);
419 MOD_OPCODE(NVVP_TX_SOP, NV20_VP_INST_OPCODE_EXP, NVS_OP_EXP, 2, -1, -1);
420 MOD_OPCODE(NVVP_TX_SOP, NV20_VP_INST_OPCODE_LOG, NVS_OP_LOG, 2, -1, -1);
421 MOD_OPCODE(NVVP_TX_SOP, NV20_VP_INST_OPCODE_LIT, NVS_OP_LIT, 2, -1, -1);
422
423 shader->UploadToHW = NV20VPUploadToHW;
424 shader->UpdateConst = NV20VPUpdateConst;
425
426 shader->GetOPTXRec = NV20VPGetOPTXRec;
427 shader->GetOPTXFromSOP = NV20VPGetOPTXFromSOP;
428
429 shader->HasMergedInst = NV20VPHasMergedInst;
430 shader->IsLastInst = NV20VPIsLastInst;
431 shader->GetOffsetNext = NV20VPGetOffsetNext;
432 shader->GetOpcodeSlot = NV20VPGetOpcodeSlot;
433 shader->GetOpcode = NV20VPGetOpcode;
434 shader->GetOpcodeHW = NV20VPGetOpcodeHW;
435 shader->GetDestFile = NV20VPGetDestFile;
436 shader->GetDestID = NV20VPGetDestID;
437 shader->GetDestMask = NV20VPGetDestMask;
438 shader->GetSourceHW = NV20VPGetSourceHW;
439 shader->GetSourceFile = NV20VPGetSourceFile;
440 shader->GetSourceID = NV20VPGetSourceID;
441 shader->GetSourceNegate = NV20VPGetSourceNegate;
442 shader->GetSourceAbs = NV20VPGetSourceAbs;
443 shader->GetSourceSwizzle = NV20VPGetSourceSwizzle;
444 shader->GetSourceIndexed = NV20VPGetSourceIndexed;
445 shader->GetRelAddressRegID = NV20VPGetAddressRegID;
446 shader->GetRelAddressSwizzle = NV20VPGetAddressRegSwizzle;
447 }