nv50,nvc0: add new texture and render target formats
[mesa.git] / src / gallium / drivers / nv50 / nv50_program.c
1 /*
2 * Copyright 2010 Chrsitoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_program.h"
24 #include "nv50_pc.h"
25 #include "nv50_context.h"
26
27 #include "pipe/p_shader_tokens.h"
28 #include "tgsi/tgsi_parse.h"
29 #include "tgsi/tgsi_util.h"
30 #include "tgsi/tgsi_dump.h"
31
32 static INLINE unsigned
33 bitcount4(const uint32_t val)
34 {
35 static const unsigned cnt[16]
36 = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
37 return cnt[val & 0xf];
38 }
39
40 static unsigned
41 nv50_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c)
42 {
43 unsigned mask = inst->Dst[0].Register.WriteMask;
44
45 switch (inst->Instruction.Opcode) {
46 case TGSI_OPCODE_COS:
47 case TGSI_OPCODE_SIN:
48 return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
49 case TGSI_OPCODE_DP3:
50 return 0x7;
51 case TGSI_OPCODE_DP4:
52 case TGSI_OPCODE_DPH:
53 case TGSI_OPCODE_KIL: /* WriteMask ignored */
54 return 0xf;
55 case TGSI_OPCODE_DST:
56 return mask & (c ? 0xa : 0x6);
57 case TGSI_OPCODE_EX2:
58 case TGSI_OPCODE_EXP:
59 case TGSI_OPCODE_LG2:
60 case TGSI_OPCODE_LOG:
61 case TGSI_OPCODE_POW:
62 case TGSI_OPCODE_RCP:
63 case TGSI_OPCODE_RSQ:
64 case TGSI_OPCODE_SCS:
65 return 0x1;
66 case TGSI_OPCODE_IF:
67 return 0x1;
68 case TGSI_OPCODE_LIT:
69 return 0xb;
70 case TGSI_OPCODE_TEX:
71 case TGSI_OPCODE_TXB:
72 case TGSI_OPCODE_TXL:
73 case TGSI_OPCODE_TXP:
74 {
75 const struct tgsi_instruction_texture *tex;
76
77 assert(inst->Instruction.Texture);
78 tex = &inst->Texture;
79
80 mask = 0x7;
81 if (inst->Instruction.Opcode != TGSI_OPCODE_TEX &&
82 inst->Instruction.Opcode != TGSI_OPCODE_TXD)
83 mask |= 0x8; /* bias, lod or proj */
84
85 switch (tex->Texture) {
86 case TGSI_TEXTURE_1D:
87 mask &= 0x9;
88 break;
89 case TGSI_TEXTURE_SHADOW1D:
90 mask &= 0x5;
91 break;
92 case TGSI_TEXTURE_2D:
93 mask &= 0xb;
94 break;
95 default:
96 break;
97 }
98 }
99 return mask;
100 case TGSI_OPCODE_XPD:
101 {
102 unsigned x = 0;
103 if (mask & 1) x |= 0x6;
104 if (mask & 2) x |= 0x5;
105 if (mask & 4) x |= 0x3;
106 return x;
107 }
108 default:
109 break;
110 }
111
112 return mask;
113 }
114
115 static void
116 nv50_indirect_inputs(struct nv50_translation_info *ti, int id)
117 {
118 int i, c;
119
120 for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i)
121 for (c = 0; c < 4; ++c)
122 ti->input_access[i][c] = id;
123
124 ti->indirect_inputs = TRUE;
125 }
126
127 static void
128 nv50_indirect_outputs(struct nv50_translation_info *ti, int id)
129 {
130 int i, c;
131
132 for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i)
133 for (c = 0; c < 4; ++c)
134 ti->output_access[i][c] = id;
135
136 ti->indirect_outputs = TRUE;
137 }
138
139 static void
140 prog_inst(struct nv50_translation_info *ti,
141 const struct tgsi_full_instruction *inst, int id)
142 {
143 const struct tgsi_dst_register *dst;
144 const struct tgsi_src_register *src;
145 int s, c, k;
146 unsigned mask;
147
148 if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) {
149 ti->subr[ti->subr_nr].pos = id - 1;
150 ti->subr[ti->subr_nr].id = ti->subr_nr + 1; /* id 0 is main program */
151 ++ti->subr_nr;
152 }
153
154 if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
155 dst = &inst->Dst[0].Register;
156
157 for (c = 0; c < 4; ++c) {
158 if (dst->Indirect)
159 nv50_indirect_outputs(ti, id);
160 if (!(dst->WriteMask & (1 << c)))
161 continue;
162 ti->output_access[dst->Index][c] = id;
163 }
164
165 if (inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
166 inst->Src[0].Register.File == TGSI_FILE_INPUT &&
167 dst->Index == ti->edgeflag_out)
168 ti->p->vp.edgeflag = inst->Src[0].Register.Index;
169 } else
170 if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
171 if (inst->Dst[0].Register.Indirect)
172 ti->store_to_memory = TRUE;
173 }
174
175 for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
176 src = &inst->Src[s].Register;
177 if (src->File == TGSI_FILE_TEMPORARY)
178 if (inst->Src[s].Register.Indirect)
179 ti->store_to_memory = TRUE;
180 if (src->File != TGSI_FILE_INPUT)
181 continue;
182 mask = nv50_tgsi_src_mask(inst, s);
183
184 if (inst->Src[s].Register.Indirect)
185 nv50_indirect_inputs(ti, id);
186
187 for (c = 0; c < 4; ++c) {
188 if (!(mask & (1 << c)))
189 continue;
190 k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
191 if (k <= TGSI_SWIZZLE_W)
192 ti->input_access[src->Index][k] = id;
193 }
194 }
195 }
196
197 /* Probably should introduce something like struct tgsi_function_declaration
198 * instead of trying to guess inputs/outputs.
199 */
200 static void
201 prog_subroutine_inst(struct nv50_subroutine *subr,
202 const struct tgsi_full_instruction *inst)
203 {
204 const struct tgsi_dst_register *dst;
205 const struct tgsi_src_register *src;
206 int s, c, k;
207 unsigned mask;
208
209 for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
210 src = &inst->Src[s].Register;
211 if (src->File != TGSI_FILE_TEMPORARY)
212 continue;
213 mask = nv50_tgsi_src_mask(inst, s);
214
215 assert(!inst->Src[s].Register.Indirect);
216
217 for (c = 0; c < 4; ++c) {
218 k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
219
220 if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W)
221 if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32))))
222 subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32);
223 }
224 }
225
226 if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
227 dst = &inst->Dst[0].Register;
228
229 for (c = 0; c < 4; ++c)
230 if (dst->WriteMask & (1 << c))
231 subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32);
232 }
233 }
234
235 static void
236 prog_immediate(struct nv50_translation_info *ti,
237 const struct tgsi_full_immediate *imm)
238 {
239 int c;
240 unsigned n = ti->immd32_nr++;
241
242 assert(ti->immd32_nr <= ti->scan.immediate_count);
243
244 for (c = 0; c < 4; ++c)
245 ti->immd32[n * 4 + c] = imm->u[c].Uint;
246
247 ti->immd32_ty[n] = imm->Immediate.DataType;
248 }
249
250 static INLINE unsigned
251 translate_interpolate(const struct tgsi_full_declaration *decl)
252 {
253 unsigned mode;
254
255 if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT)
256 mode = NV50_INTERP_FLAT;
257 else
258 if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
259 mode = 0;
260 else
261 mode = NV50_INTERP_LINEAR;
262
263 if (decl->Declaration.Centroid)
264 mode |= NV50_INTERP_CENTROID;
265
266 return mode;
267 }
268
269 static void
270 prog_decl(struct nv50_translation_info *ti,
271 const struct tgsi_full_declaration *decl)
272 {
273 unsigned i, first, last, sn = 0, si = 0;
274
275 first = decl->Range.First;
276 last = decl->Range.Last;
277
278 if (decl->Declaration.Semantic) {
279 sn = decl->Semantic.Name;
280 si = decl->Semantic.Index;
281 }
282
283 switch (decl->Declaration.File) {
284 case TGSI_FILE_INPUT:
285 for (i = first; i <= last; ++i)
286 ti->interp_mode[i] = translate_interpolate(decl);
287
288 if (!decl->Declaration.Semantic)
289 break;
290
291 for (i = first; i <= last; ++i) {
292 ti->p->in[i].sn = sn;
293 ti->p->in[i].si = si;
294 }
295
296 switch (sn) {
297 case TGSI_SEMANTIC_FACE:
298 break;
299 case TGSI_SEMANTIC_COLOR:
300 if (ti->p->type == PIPE_SHADER_FRAGMENT)
301 ti->p->vp.bfc[si] = first;
302 break;
303 }
304 break;
305 case TGSI_FILE_OUTPUT:
306 if (!decl->Declaration.Semantic)
307 break;
308
309 for (i = first; i <= last; ++i) {
310 ti->p->out[i].sn = sn;
311 ti->p->out[i].si = si;
312 }
313
314 switch (sn) {
315 case TGSI_SEMANTIC_BCOLOR:
316 ti->p->vp.bfc[si] = first;
317 break;
318 case TGSI_SEMANTIC_PSIZE:
319 ti->p->vp.psiz = first;
320 break;
321 case TGSI_SEMANTIC_EDGEFLAG:
322 ti->edgeflag_out = first;
323 break;
324 default:
325 break;
326 }
327 break;
328 case TGSI_FILE_SYSTEM_VALUE:
329 /* For VP/GP inputs, they are put in s[] after the last normal input.
330 * Let sysval_map reflect the order of the sysvals in s[] and fixup later.
331 */
332 switch (decl->Semantic.Name) {
333 case TGSI_SEMANTIC_FACE:
334 break;
335 case TGSI_SEMANTIC_INSTANCEID:
336 ti->p->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID;
337 ti->sysval_map[first] = 2;
338 break;
339 case TGSI_SEMANTIC_PRIMID:
340 break;
341 /*
342 case TGSI_SEMANTIC_PRIMIDIN:
343 break;
344 case TGSI_SEMANTIC_VERTEXID:
345 break;
346 */
347 default:
348 break;
349 }
350 break;
351 case TGSI_FILE_CONSTANT:
352 ti->p->parm_size = MAX2(ti->p->parm_size, (last + 1) * 16);
353 break;
354 case TGSI_FILE_ADDRESS:
355 case TGSI_FILE_SAMPLER:
356 case TGSI_FILE_TEMPORARY:
357 break;
358 default:
359 assert(0);
360 break;
361 }
362 }
363
364 static int
365 nv50_vertprog_prepare(struct nv50_translation_info *ti)
366 {
367 struct nv50_program *p = ti->p;
368 int i, c;
369 unsigned num_inputs = 0;
370
371 ti->input_file = NV_FILE_MEM_S;
372 ti->output_file = NV_FILE_OUT;
373
374 for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
375 p->in[i].id = i;
376 p->in[i].hw = num_inputs;
377
378 for (c = 0; c < 4; ++c) {
379 if (!ti->input_access[i][c])
380 continue;
381 ti->input_map[i][c] = num_inputs++;
382 p->vp.attrs[(4 * i + c) / 32] |= 1 << ((i * 4 + c) % 32);
383 }
384 }
385
386 for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
387 p->out[i].id = i;
388 p->out[i].hw = p->max_out;
389
390 for (c = 0; c < 4; ++c) {
391 if (!ti->output_access[i][c])
392 continue;
393 ti->output_map[i][c] = p->max_out++;
394 p->out[i].mask |= 1 << c;
395 }
396 }
397
398 for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) {
399 switch (ti->sysval_map[i]) {
400 case 2:
401 if (!(ti->p->vp.attrs[2] & NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID))
402 ti->sysval_map[i] = 1;
403 ti->sysval_map[i] = (ti->sysval_map[i] - 1) + num_inputs;
404 break;
405 default:
406 break;
407 }
408 }
409
410 if (p->vp.psiz < 0x40)
411 p->vp.psiz = p->out[p->vp.psiz].hw;
412
413 return 0;
414 }
415
416 static int
417 nv50_fragprog_prepare(struct nv50_translation_info *ti)
418 {
419 struct nv50_program *p = ti->p;
420 int i, j, c;
421 unsigned nvary, nintp, depr;
422 unsigned n = 0, m = 0, skip = 0;
423 ubyte sn[16], si[16];
424
425 /* FP flags */
426
427 if (ti->scan.writes_z) {
428 p->fp.flags[1] = 0x11;
429 p->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z;
430 }
431
432 if (ti->scan.uses_kill)
433 p->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
434
435 /* FP inputs */
436
437 ti->input_file = NV_FILE_MEM_V;
438 ti->output_file = NV_FILE_GPR;
439
440 /* count non-flat inputs, save semantic info */
441 for (i = 0; i < p->in_nr; ++i) {
442 m += (ti->interp_mode[i] & NV50_INTERP_FLAT) ? 0 : 1;
443 sn[i] = p->in[i].sn;
444 si[i] = p->in[i].si;
445 }
446
447 /* reorder p->in[] so that non-flat inputs are first and
448 * kick out special inputs that don't use VP/GP_RESULT_MAP
449 */
450 nintp = 0;
451 for (i = 0; i < p->in_nr; ++i) {
452 if (sn[i] == TGSI_SEMANTIC_POSITION) {
453 for (c = 0; c < 4; ++c) {
454 ti->input_map[i][c] = nintp;
455 if (ti->input_access[i][c]) {
456 p->fp.interp |= 1 << (24 + c);
457 ++nintp;
458 }
459 }
460 skip++;
461 continue;
462 } else
463 if (sn[i] == TGSI_SEMANTIC_FACE) {
464 ti->input_map[i][0] = 255;
465 skip++;
466 continue;
467 }
468
469 j = (ti->interp_mode[i] & NV50_INTERP_FLAT) ? m++ : n++;
470
471 if (sn[i] == TGSI_SEMANTIC_COLOR)
472 p->vp.bfc[si[i]] = j;
473
474 p->in[j].linear = (ti->interp_mode[i] & NV50_INTERP_LINEAR) ? 1 : 0;
475 p->in[j].id = i;
476 p->in[j].sn = sn[i];
477 p->in[j].si = si[i];
478 }
479 assert(n <= m);
480 p->in_nr -= skip;
481
482 if (!(p->fp.interp & (8 << 24))) {
483 p->fp.interp |= (8 << 24);
484 ++nintp;
485 }
486
487 p->fp.colors = 4 << NV50_3D_MAP_SEMANTIC_0_FFC0_ID__SHIFT; /* after HPOS */
488
489 for (i = 0; i < p->in_nr; ++i) {
490 int j = p->in[i].id;
491 p->in[i].hw = nintp;
492
493 for (c = 0; c < 4; ++c) {
494 if (!ti->input_access[j][c])
495 continue;
496 p->in[i].mask |= 1 << c;
497 ti->input_map[j][c] = nintp++;
498 }
499 /* count color inputs */
500 if (i == p->vp.bfc[0] || i == p->vp.bfc[1])
501 p->fp.colors += bitcount4(p->in[i].mask) << 16;
502 }
503 nintp -= bitcount4(p->fp.interp >> 24); /* subtract position inputs */
504 nvary = nintp;
505 if (n < m)
506 nvary -= p->in[n].hw;
507
508 p->fp.interp |= nvary << NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT;
509 p->fp.interp |= nintp << NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT;
510
511 /* FP outputs */
512
513 if (p->out_nr > (1 + (ti->scan.writes_z ? 1 : 0)))
514 p->fp.flags[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS;
515
516 depr = p->out_nr;
517 for (i = 0; i < p->out_nr; ++i) {
518 p->out[i].id = i;
519 if (p->out[i].sn == TGSI_SEMANTIC_POSITION) {
520 depr = i;
521 continue;
522 }
523 p->out[i].hw = p->max_out;
524 p->out[i].mask = 0xf;
525
526 for (c = 0; c < 4; ++c)
527 ti->output_map[i][c] = p->max_out++;
528 }
529 if (depr < p->out_nr) {
530 p->out[depr].mask = 0x4;
531 p->out[depr].hw = ti->output_map[depr][2] = p->max_out++;
532 } else {
533 /* allowed values are 1, 4, 5, 8, 9, ... */
534 p->max_out = MAX2(4, p->max_out);
535 }
536
537 return 0;
538 }
539
540 static int
541 nv50_geomprog_prepare(struct nv50_translation_info *ti)
542 {
543 ti->input_file = NV_FILE_MEM_S;
544 ti->output_file = NV_FILE_OUT;
545
546 assert(0);
547 return 1;
548 }
549
550 static int
551 nv50_prog_scan(struct nv50_translation_info *ti)
552 {
553 struct nv50_program *p = ti->p;
554 struct tgsi_parse_context parse;
555 int ret, i;
556
557 p->vp.edgeflag = 0x40;
558 p->vp.psiz = 0x40;
559 p->vp.bfc[0] = 0x40;
560 p->vp.bfc[1] = 0x40;
561 p->gp.primid = 0x80;
562
563 tgsi_scan_shader(p->pipe.tokens, &ti->scan);
564
565 #if NV50_DEBUG & NV50_DEBUG_SHADER
566 tgsi_dump(p->pipe.tokens, 0);
567 #endif
568
569 ti->subr =
570 CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0]));
571
572 ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16);
573 ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte));
574
575 ti->insns = MALLOC(ti->scan.num_instructions * sizeof(ti->insns[0]));
576
577 tgsi_parse_init(&parse, p->pipe.tokens);
578 while (!tgsi_parse_end_of_tokens(&parse)) {
579 tgsi_parse_token(&parse);
580
581 switch (parse.FullToken.Token.Type) {
582 case TGSI_TOKEN_TYPE_IMMEDIATE:
583 prog_immediate(ti, &parse.FullToken.FullImmediate);
584 break;
585 case TGSI_TOKEN_TYPE_DECLARATION:
586 prog_decl(ti, &parse.FullToken.FullDeclaration);
587 break;
588 case TGSI_TOKEN_TYPE_INSTRUCTION:
589 ti->insns[ti->inst_nr] = parse.FullToken.FullInstruction;
590 prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->inst_nr);
591 break;
592 }
593 }
594
595 /* Scan to determine which registers are inputs/outputs of a subroutine. */
596 for (i = 0; i < ti->subr_nr; ++i) {
597 int pc = ti->subr[i].id;
598 while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB)
599 prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]);
600 }
601
602 p->in_nr = ti->scan.file_max[TGSI_FILE_INPUT] + 1;
603 p->out_nr = ti->scan.file_max[TGSI_FILE_OUTPUT] + 1;
604
605 switch (p->type) {
606 case PIPE_SHADER_VERTEX:
607 ret = nv50_vertprog_prepare(ti);
608 break;
609 case PIPE_SHADER_FRAGMENT:
610 ret = nv50_fragprog_prepare(ti);
611 break;
612 case PIPE_SHADER_GEOMETRY:
613 ret = nv50_geomprog_prepare(ti);
614 break;
615 default:
616 assert(!"unsupported program type");
617 ret = -1;
618 break;
619 }
620
621 assert(!ret);
622 return ret;
623 }
624
625 boolean
626 nv50_program_translate(struct nv50_program *p)
627 {
628 struct nv50_translation_info *ti;
629 int ret;
630
631 ti = CALLOC_STRUCT(nv50_translation_info);
632 ti->p = p;
633
634 ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS;
635
636 ret = nv50_prog_scan(ti);
637 if (ret) {
638 NOUVEAU_ERR("unsupported shader program\n");
639 goto out;
640 }
641
642 ret = nv50_generate_code(ti);
643 if (ret) {
644 NOUVEAU_ERR("error during shader translation\n");
645 goto out;
646 }
647
648 out:
649 if (ti->immd32)
650 FREE(ti->immd32);
651 if (ti->immd32_ty)
652 FREE(ti->immd32_ty);
653 if (ti->insns)
654 FREE(ti->insns);
655 if (ti->subr)
656 FREE(ti->subr);
657 FREE(ti);
658 return ret ? FALSE : TRUE;
659 }
660
661 void
662 nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
663 {
664 if (p->res)
665 nouveau_resource_free(&p->res);
666
667 if (p->code)
668 FREE(p->code);
669
670 if (p->fixups)
671 FREE(p->fixups);
672
673 p->translated = FALSE;
674 }