nvc0: use tile flags in a way compatible with nouveau
[mesa.git] / src / gallium / drivers / nvc0 / nvc0_program.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "pipe/p_shader_tokens.h"
24 #include "pipe/p_defines.h"
25
26 #include "tgsi/tgsi_parse.h"
27 #include "tgsi/tgsi_util.h"
28 #include "tgsi/tgsi_dump.h"
29
30 #include "nvc0_context.h"
31 #include "nvc0_pc.h"
32
33 static unsigned
34 nvc0_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c)
35 {
36 unsigned mask = inst->Dst[0].Register.WriteMask;
37
38 switch (inst->Instruction.Opcode) {
39 case TGSI_OPCODE_COS:
40 case TGSI_OPCODE_SIN:
41 return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
42 case TGSI_OPCODE_DP3:
43 return 0x7;
44 case TGSI_OPCODE_DP4:
45 case TGSI_OPCODE_DPH:
46 case TGSI_OPCODE_KIL: /* WriteMask ignored */
47 return 0xf;
48 case TGSI_OPCODE_DST:
49 return mask & (c ? 0xa : 0x6);
50 case TGSI_OPCODE_EX2:
51 case TGSI_OPCODE_EXP:
52 case TGSI_OPCODE_LG2:
53 case TGSI_OPCODE_LOG:
54 case TGSI_OPCODE_POW:
55 case TGSI_OPCODE_RCP:
56 case TGSI_OPCODE_RSQ:
57 case TGSI_OPCODE_SCS:
58 return 0x1;
59 case TGSI_OPCODE_IF:
60 return 0x1;
61 case TGSI_OPCODE_LIT:
62 return 0xb;
63 case TGSI_OPCODE_TEX:
64 case TGSI_OPCODE_TXB:
65 case TGSI_OPCODE_TXL:
66 case TGSI_OPCODE_TXP:
67 {
68 const struct tgsi_instruction_texture *tex;
69
70 assert(inst->Instruction.Texture);
71 tex = &inst->Texture;
72
73 mask = 0x7;
74 if (inst->Instruction.Opcode != TGSI_OPCODE_TEX &&
75 inst->Instruction.Opcode != TGSI_OPCODE_TXD)
76 mask |= 0x8; /* bias, lod or proj */
77
78 switch (tex->Texture) {
79 case TGSI_TEXTURE_1D:
80 mask &= 0x9;
81 break;
82 case TGSI_TEXTURE_SHADOW1D:
83 mask &= 0x5;
84 break;
85 case TGSI_TEXTURE_2D:
86 mask &= 0xb;
87 break;
88 default:
89 break;
90 }
91 }
92 return mask;
93 case TGSI_OPCODE_XPD:
94 {
95 unsigned x = 0;
96 if (mask & 1) x |= 0x6;
97 if (mask & 2) x |= 0x5;
98 if (mask & 4) x |= 0x3;
99 return x;
100 }
101 default:
102 break;
103 }
104
105 return mask;
106 }
107
108 static void
109 nvc0_indirect_inputs(struct nvc0_translation_info *ti, int id)
110 {
111 int i, c;
112
113 for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i)
114 for (c = 0; c < 4; ++c)
115 ti->input_access[i][c] = id;
116
117 ti->indirect_inputs = TRUE;
118 }
119
120 static void
121 nvc0_indirect_outputs(struct nvc0_translation_info *ti, int id)
122 {
123 int i, c;
124
125 for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i)
126 for (c = 0; c < 4; ++c)
127 ti->output_access[i][c] = id;
128
129 ti->indirect_outputs = TRUE;
130 }
131
132 static INLINE unsigned
133 nvc0_system_value_location(unsigned sn, unsigned si)
134 {
135 switch (sn) {
136 /*
137 case TGSI_SEMANTIC_VERTEXID:
138 return 0x2fc;
139 */
140 case TGSI_SEMANTIC_PRIMID:
141 return 0x60;
142 /*
143 case TGSI_SEMANTIC_LAYER_INDEX:
144 return 0x64;
145 case TGSI_SEMANTIC_VIEWPORT_INDEX:
146 return 0x68;
147 */
148 case TGSI_SEMANTIC_INSTANCEID:
149 return 0x2f8;
150 default:
151 assert(0);
152 return 0x000;
153 }
154 }
155
156 static INLINE unsigned
157 nvc0_varying_location(unsigned sn, unsigned si)
158 {
159 switch (sn) {
160 case TGSI_SEMANTIC_POSITION:
161 return 0x70;
162 case TGSI_SEMANTIC_COLOR:
163 return 0x280 + (si * 16); /* are these hard-wired ? */
164 case TGSI_SEMANTIC_BCOLOR:
165 return 0x2a0 + (si * 16);
166 case TGSI_SEMANTIC_FOG:
167 return 0x270;
168 case TGSI_SEMANTIC_PSIZE:
169 return 0x6c;
170 /*
171 case TGSI_SEMANTIC_PNTC:
172 return 0x2e0;
173 */
174 case TGSI_SEMANTIC_GENERIC:
175 assert(si < 31);
176 return 0x80 + (si * 16);
177 case TGSI_SEMANTIC_NORMAL:
178 return 0x360;
179 case TGSI_SEMANTIC_PRIMID:
180 return 0x40;
181 /*
182 case TGSI_SEMANTIC_CLIP_DISTANCE:
183 return 0x2c0 + (si * 4);
184 */
185 default:
186 assert(0);
187 return 0x000;
188 }
189 }
190
191 static INLINE unsigned
192 nvc0_interp_mode(const struct tgsi_full_declaration *decl)
193 {
194 unsigned mode;
195
196 if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT)
197 mode = NVC0_INTERP_FLAT;
198 else
199 if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
200 mode = NVC0_INTERP_PERSPECTIVE;
201 else
202 mode = NVC0_INTERP_LINEAR;
203
204 if (decl->Declaration.Centroid)
205 mode |= NVC0_INTERP_CENTROID;
206
207 return mode;
208 }
209
210 static void
211 prog_immediate(struct nvc0_translation_info *ti,
212 const struct tgsi_full_immediate *imm)
213 {
214 int c;
215 unsigned n = ti->immd32_nr++;
216
217 assert(ti->immd32_nr <= ti->scan.immediate_count);
218
219 for (c = 0; c < 4; ++c)
220 ti->immd32[n * 4 + c] = imm->u[c].Uint;
221
222 ti->immd32_ty[n] = imm->Immediate.DataType;
223 }
224
225 static boolean
226 prog_decl(struct nvc0_translation_info *ti,
227 const struct tgsi_full_declaration *decl)
228 {
229 unsigned i, c;
230 unsigned sn = TGSI_SEMANTIC_GENERIC;
231 unsigned si = 0;
232 const unsigned first = decl->Range.First;
233 const unsigned last = decl->Range.Last;
234
235 if (decl->Declaration.Semantic) {
236 sn = decl->Semantic.Name;
237 si = decl->Semantic.Index;
238 }
239
240 switch (decl->Declaration.File) {
241 case TGSI_FILE_INPUT:
242 for (i = first; i <= last; ++i) {
243 if (ti->prog->type == PIPE_SHADER_VERTEX) {
244 sn = TGSI_SEMANTIC_GENERIC;
245 si = i;
246 }
247 for (c = 0; c < 4; ++c)
248 ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
249
250 if (ti->prog->type == PIPE_SHADER_FRAGMENT)
251 ti->interp_mode[i] = nvc0_interp_mode(decl);
252 }
253 break;
254 case TGSI_FILE_OUTPUT:
255 for (i = first; i <= last; ++i, ++si) {
256 if (ti->prog->type == PIPE_SHADER_FRAGMENT) {
257 si = i;
258 if (i == ti->fp_depth_output) {
259 ti->output_loc[i][2] = (ti->scan.num_outputs - 1) * 4;
260 } else {
261 if (i > ti->fp_depth_output)
262 si -= 1;
263 for (c = 0; c < 4; ++c)
264 ti->output_loc[i][c] = si * 4 + c;
265 }
266 } else {
267 for (c = 0; c < 4; ++c)
268 ti->output_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
269 }
270 }
271 break;
272 case TGSI_FILE_SYSTEM_VALUE:
273 ti->sysval_loc[i] = nvc0_system_value_location(sn, si);
274 assert(first == last);
275 break;
276 case TGSI_FILE_NULL:
277 case TGSI_FILE_CONSTANT:
278 case TGSI_FILE_TEMPORARY:
279 case TGSI_FILE_SAMPLER:
280 case TGSI_FILE_ADDRESS:
281 case TGSI_FILE_IMMEDIATE:
282 case TGSI_FILE_PREDICATE:
283 break;
284 default:
285 NOUVEAU_ERR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
286 return FALSE;
287 }
288 return TRUE;
289 }
290
291 static void
292 prog_inst(struct nvc0_translation_info *ti,
293 const struct tgsi_full_instruction *inst, int id)
294 {
295 const struct tgsi_dst_register *dst;
296 const struct tgsi_src_register *src;
297 int s, c, k;
298 unsigned mask;
299
300 if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) {
301 ti->subr[ti->num_subrs].first_insn = id - 1;
302 ti->subr[ti->num_subrs].id = ti->num_subrs + 1; /* id 0 is main program */
303 ++ti->num_subrs;
304 }
305
306 if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
307 dst = &inst->Dst[0].Register;
308
309 for (c = 0; c < 4; ++c) {
310 if (dst->Indirect)
311 nvc0_indirect_outputs(ti, id);
312 if (!(dst->WriteMask & (1 << c)))
313 continue;
314 ti->output_access[dst->Index][c] = id;
315 }
316
317 if (inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
318 inst->Src[0].Register.File == TGSI_FILE_INPUT &&
319 dst->Index == ti->edgeflag_out)
320 ti->prog->vp.edgeflag = inst->Src[0].Register.Index;
321 } else
322 if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
323 if (inst->Dst[0].Register.Indirect)
324 ti->require_stores = TRUE;
325 }
326
327 for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
328 src = &inst->Src[s].Register;
329 if (src->File == TGSI_FILE_TEMPORARY)
330 if (inst->Src[s].Register.Indirect)
331 ti->require_stores = TRUE;
332 if (src->File != TGSI_FILE_INPUT)
333 continue;
334 mask = nvc0_tgsi_src_mask(inst, s);
335
336 if (inst->Src[s].Register.Indirect)
337 nvc0_indirect_inputs(ti, id);
338
339 for (c = 0; c < 4; ++c) {
340 if (!(mask & (1 << c)))
341 continue;
342 k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
343 if (k <= TGSI_SWIZZLE_W)
344 ti->input_access[src->Index][k] = id;
345 }
346 }
347 }
348
349 /* Probably should introduce something like struct tgsi_function_declaration
350 * instead of trying to guess inputs/outputs.
351 */
352 static void
353 prog_subroutine_inst(struct nvc0_subroutine *subr,
354 const struct tgsi_full_instruction *inst)
355 {
356 const struct tgsi_dst_register *dst;
357 const struct tgsi_src_register *src;
358 int s, c, k;
359 unsigned mask;
360
361 for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
362 src = &inst->Src[s].Register;
363 if (src->File != TGSI_FILE_TEMPORARY)
364 continue;
365 mask = nvc0_tgsi_src_mask(inst, s);
366
367 for (c = 0; c < 4; ++c) {
368 k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
369
370 if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W)
371 if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32))))
372 subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32);
373 }
374 }
375
376 if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
377 dst = &inst->Dst[0].Register;
378
379 for (c = 0; c < 4; ++c)
380 if (dst->WriteMask & (1 << c))
381 subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32);
382 }
383 }
384
385 static int
386 nvc0_vp_gp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti)
387 {
388 int i, c;
389 unsigned a;
390
391 for (a = 0x80/4, i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
392 for (c = 0; c < 4; ++c, ++a)
393 if (ti->input_access[i][c])
394 vp->hdr[5 + a / 32] |= 1 << (a % 32); /* VP_ATTR_EN */
395 }
396
397 for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
398 a = (ti->output_loc[i][0] - 0x40) / 4;
399 for (c = 0; c < 4; ++c, ++a) {
400 if (!ti->output_access[i][c])
401 continue;
402 vp->hdr[13 + a / 32] |= 1 << (a % 32); /* VP_EXPORT_EN */
403 }
404 }
405
406 return 0;
407 }
408
409 static int
410 nvc0_vp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti)
411 {
412 vp->hdr[0] = 0x20461;
413 vp->hdr[4] = 0xff000;
414
415 return nvc0_vp_gp_gen_header(vp, ti);
416 }
417
418 static int
419 nvc0_gp_gen_header(struct nvc0_program *gp, struct nvc0_translation_info *ti)
420 {
421 unsigned max_output_verts, output_prim;
422 unsigned i;
423
424 gp->hdr[0] = 0x00021061;
425 gp->hdr[2] = 0x01000000;
426
427 for (i = 0; i < ti->scan.num_properties; ++i) {
428 switch (ti->scan.properties[i].name) {
429 case TGSI_PROPERTY_GS_OUTPUT_PRIM:
430 output_prim = ti->scan.properties[i].data[0];
431 break;
432 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
433 max_output_verts = ti->scan.properties[i].data[0];
434 break;
435 default:
436 break;
437 }
438 }
439
440 switch (output_prim) {
441 case PIPE_PRIM_POINTS:
442 gp->hdr[3] = 0x01000000;
443 gp->hdr[0] |= 0xf0000000;
444 break;
445 case PIPE_PRIM_LINE_STRIP:
446 gp->hdr[3] = 0x06000000;
447 gp->hdr[0] |= 0x10000000;
448 break;
449 case PIPE_PRIM_TRIANGLE_STRIP:
450 gp->hdr[3] = 0x07000000;
451 gp->hdr[0] |= 0x10000000;
452 break;
453 default:
454 assert(0);
455 break;
456 }
457
458 gp->hdr[4] = max_output_verts & 0x1ff;
459
460 return nvc0_vp_gp_gen_header(gp, ti);
461 }
462
463 static int
464 nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti)
465 {
466 int i, c;
467 unsigned a, m;
468
469 fp->hdr[0] = 0x21462;
470 fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */
471
472 if (ti->scan.uses_kill)
473 fp->hdr[0] |= 0x8000;
474 if (ti->scan.writes_z) {
475 fp->hdr[19] |= 0x2;
476 if (ti->scan.num_outputs > 2)
477 fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
478 } else {
479 if (ti->scan.num_outputs > 1)
480 fp->hdr[0] |= 0x8000; /* FP_MULTIPLE_COLOR_OUTPUTS */
481 }
482
483 for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
484 m = ti->interp_mode[i];
485 for (c = 0; c < 4; ++c) {
486 if (!ti->input_access[i][c])
487 continue;
488 a = ti->input_loc[i][c] / 2;
489 if ((a & ~7) == 0x70/2)
490 fp->hdr[5] |= 1 << (28 + (a & 7) / 2); /* FRAG_COORD_UMASK */
491 else
492 fp->hdr[4 + a / 32] |= m << (a % 32);
493 }
494 }
495
496 for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
497 if (i != ti->fp_depth_output)
498 fp->hdr[18] |= 0xf << ti->output_loc[i][0];
499 }
500
501 return 0;
502 }
503
504 static boolean
505 nvc0_prog_scan(struct nvc0_translation_info *ti)
506 {
507 struct nvc0_program *prog = ti->prog;
508 struct tgsi_parse_context parse;
509 int ret;
510 unsigned i;
511
512 #ifdef NOUVEAU_DEBUG
513 tgsi_dump(prog->pipe.tokens, 0);
514 #endif
515
516 tgsi_scan_shader(prog->pipe.tokens, &ti->scan);
517
518 if (ti->prog->type == PIPE_SHADER_FRAGMENT) {
519 ti->fp_depth_output = 255;
520 for (i = 0; i < ti->scan.num_outputs; ++i)
521 if (ti->scan.output_semantic_name[i] == TGSI_SEMANTIC_POSITION)
522 ti->fp_depth_output = i;
523 }
524
525 ti->subr =
526 CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0]));
527
528 ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16);
529 ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte));
530
531 ti->insns = MALLOC(ti->scan.num_instructions * sizeof(ti->insns[0]));
532
533 tgsi_parse_init(&parse, prog->pipe.tokens);
534 while (!tgsi_parse_end_of_tokens(&parse)) {
535 tgsi_parse_token(&parse);
536
537 switch (parse.FullToken.Token.Type) {
538 case TGSI_TOKEN_TYPE_IMMEDIATE:
539 prog_immediate(ti, &parse.FullToken.FullImmediate);
540 break;
541 case TGSI_TOKEN_TYPE_DECLARATION:
542 prog_decl(ti, &parse.FullToken.FullDeclaration);
543 break;
544 case TGSI_TOKEN_TYPE_INSTRUCTION:
545 ti->insns[ti->num_insns] = parse.FullToken.FullInstruction;
546 prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->num_insns);
547 break;
548 default:
549 break;
550 }
551 }
552
553 for (i = 0; i < ti->num_subrs; ++i) {
554 unsigned pc = ti->subr[i].id;
555 while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB)
556 prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]);
557 }
558
559 switch (prog->type) {
560 case PIPE_SHADER_VERTEX:
561 ti->input_file = NV_FILE_MEM_A;
562 ti->output_file = NV_FILE_MEM_V;
563 ret = nvc0_vp_gen_header(prog, ti);
564 break;
565 /*
566 case PIPE_SHADER_TESSELLATION_CONTROL:
567 ret = nvc0_tcp_gen_header(ti);
568 break;
569 case PIPE_SHADER_TESSELLATION_EVALUATION:
570 ret = nvc0_tep_gen_header(ti);
571 break;
572 case PIPE_SHADER_GEOMETRY:
573 ret = nvc0_gp_gen_header(ti);
574 break;
575 */
576 case PIPE_SHADER_FRAGMENT:
577 ti->input_file = NV_FILE_MEM_V;
578 ti->output_file = NV_FILE_GPR;
579 ret = nvc0_fp_gen_header(prog, ti);
580 break;
581 default:
582 assert(!"unsupported program type");
583 ret = -1;
584 break;
585 }
586
587 assert(!ret);
588 return ret;
589 }
590
591 boolean
592 nvc0_program_translate(struct nvc0_program *prog)
593 {
594 struct nvc0_translation_info *ti;
595 int ret;
596
597 ti = CALLOC_STRUCT(nvc0_translation_info);
598 ti->prog = prog;
599
600 ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS;
601
602 ret = nvc0_prog_scan(ti);
603 if (ret) {
604 NOUVEAU_ERR("unsupported shader program\n");
605 goto out;
606 }
607
608 ret = nvc0_generate_code(ti);
609 if (ret)
610 NOUVEAU_ERR("shader translation failed\n");
611
612 {
613 unsigned i;
614 for (i = 0; i < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++i)
615 debug_printf("HDR[%02lx] = 0x%08x\n",
616 i * sizeof(prog->hdr[0]), prog->hdr[i]);
617 }
618
619 out:
620 if (ti->immd32)
621 FREE(ti->immd32);
622 if (ti->immd32_ty)
623 FREE(ti->immd32_ty);
624 if (ti->insns)
625 FREE(ti->insns);
626 if (ti->subr)
627 FREE(ti->subr);
628 FREE(ti);
629 return ret ? FALSE : TRUE;
630 }
631
632 void
633 nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
634 {
635 if (prog->res)
636 nouveau_resource_free(&prog->res);
637
638 if (prog->code)
639 FREE(prog->code);
640 if (prog->relocs)
641 FREE(prog->relocs);
642
643 prog->translated = FALSE;
644 }