nv50: hook up to new shader code generator
[mesa.git] / src / gallium / drivers / nv50 / nv50_program.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_program.h"
24 #include "nv50_context.h"
25
26 #include "codegen/nv50_ir_driver.h"
27
28 static INLINE unsigned
29 bitcount4(const uint32_t val)
30 {
31 static const uint8_t cnt[16]
32 = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
33 return cnt[val & 0xf];
34 }
35
36 static int
37 nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
38 {
39 struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
40 unsigned i, n, c;
41
42 n = 0;
43 for (i = 0; i < info->numInputs; ++i) {
44 prog->in[i].id = i;
45 prog->in[i].sn = info->in[i].sn;
46 prog->in[i].si = info->in[i].si;
47 prog->in[i].hw = n;
48 prog->in[i].mask = info->in[i].mask;
49
50 prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32);
51
52 for (c = 0; c < 4; ++c)
53 if (info->in[i].mask & (1 << c))
54 info->in[i].slot[c] = n++;
55 }
56 prog->in_nr = info->numInputs;
57
58 for (i = 0; i < info->numSysVals; ++i) {
59 switch (info->sv[i].sn) {
60 case TGSI_SEMANTIC_INSTANCEID:
61 prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID;
62 continue;
63 case TGSI_SEMANTIC_VERTEXID:
64 prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
65 prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_UNK12;
66 continue;
67 default:
68 break;
69 }
70 }
71 /* VertexID before InstanceID */
72 if (info->io.vertexId < info->numSysVals)
73 info->sv[info->io.vertexId].slot[0] = n++;
74 if (info->io.instanceId < info->numSysVals)
75 info->sv[info->io.instanceId].slot[0] = n++;
76
77 n = 0;
78 for (i = 0; i < info->numOutputs; ++i) {
79 switch (info->out[i].sn) {
80 case TGSI_SEMANTIC_PSIZE:
81 prog->vp.psiz = i;
82 break;
83 case TGSI_SEMANTIC_CLIPDIST:
84 prog->vp.clpd[info->out[i].si] = n;
85 break;
86 case TGSI_SEMANTIC_EDGEFLAG:
87 prog->vp.edgeflag = i;
88 break;
89 case TGSI_SEMANTIC_BCOLOR:
90 prog->vp.bfc[info->out[i].si] = i;
91 break;
92 default:
93 break;
94 }
95 prog->out[i].id = i;
96 prog->out[i].sn = info->out[i].sn;
97 prog->out[i].si = info->out[i].si;
98 prog->out[i].hw = n;
99 prog->out[i].mask = info->out[i].mask;
100
101 for (c = 0; c < 4; ++c)
102 if (info->out[i].mask & (1 << c))
103 info->out[i].slot[c] = n++;
104 }
105 prog->out_nr = info->numOutputs;
106 prog->max_out = n;
107
108 if (prog->vp.psiz < info->numOutputs)
109 prog->vp.psiz = prog->out[prog->vp.psiz].hw;
110
111 return 0;
112 }
113
114 static int
115 nv50_fragprog_assign_slots(struct nv50_ir_prog_info *info)
116 {
117 struct nv50_program *prog = (struct nv50_program *)info->driverPriv;
118 unsigned i, n, m, c;
119 unsigned nvary;
120 unsigned nflat;
121 unsigned nintp = 0;
122
123 /* count recorded non-flat inputs */
124 for (m = 0, i = 0; i < info->numInputs; ++i) {
125 switch (info->in[i].sn) {
126 case TGSI_SEMANTIC_POSITION:
127 case TGSI_SEMANTIC_FACE:
128 continue;
129 default:
130 m += info->in[i].flat ? 0 : 1;
131 break;
132 }
133 }
134 /* careful: id may be != i in info->in[prog->in[i].id] */
135
136 /* Fill prog->in[] so that non-flat inputs are first and
137 * kick out special inputs that don't use the RESULT_MAP.
138 */
139 for (n = 0, i = 0; i < info->numInputs; ++i) {
140 if (info->in[i].sn == TGSI_SEMANTIC_POSITION) {
141 prog->fp.interp |= info->in[i].mask << 24;
142 for (c = 0; c < 4; ++c)
143 if (info->in[i].mask & (1 << c))
144 info->in[i].slot[c] = nintp++;
145 } else
146 if (info->in[i].sn == TGSI_SEMANTIC_FACE) {
147 info->in[i].slot[0] = 255;
148 } else {
149 unsigned j = info->in[i].flat ? m++ : n++;
150
151 if (info->in[i].sn == TGSI_SEMANTIC_COLOR)
152 prog->vp.bfc[info->in[i].si] = j;
153
154 prog->in[j].id = i;
155 prog->in[j].mask = info->in[i].mask;
156 prog->in[j].sn = info->in[i].sn;
157 prog->in[j].si = info->in[i].si;
158 prog->in[j].linear = info->in[i].linear;
159
160 prog->in_nr++;
161 }
162 }
163 if (!(prog->fp.interp & (8 << 24))) {
164 ++nintp;
165 prog->fp.interp |= 8 << 24;
166 }
167
168 for (i = 0; i < prog->in_nr; ++i) {
169 int j = prog->in[i].id;
170
171 prog->in[i].hw = nintp;
172 for (c = 0; c < 4; ++c)
173 if (info->in[i].mask & (1 << c))
174 info->in[j].slot[c] = nintp++;
175 }
176 /* (n == m) if m never increased, i.e. no flat inputs */
177 nflat = (n < m) ? (nintp - prog->in[n].hw) : 0;
178 nintp -= bitcount4(prog->fp.interp >> 24); /* subtract position inputs */
179 nvary = nintp - nflat;
180
181 prog->fp.interp |= nvary << NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT;
182 prog->fp.interp |= nintp << NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT;
183
184 /* put front/back colors right after HPOS */
185 prog->fp.colors = 4 << NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT;
186 for (i = 0; i < 2; ++i)
187 if (prog->vp.bfc[i] < 0x80)
188 prog->fp.colors += bitcount4(prog->in[prog->vp.bfc[i]].mask) << 16;
189
190 /* FP outputs */
191
192 if (info->prop.fp.numColourResults > 1)
193 prog->fp.flags[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS;
194
195 for (i = 0; i < info->numOutputs; ++i) {
196 prog->out[i].id = i;
197 prog->out[i].sn = info->out[i].sn;
198 prog->out[i].si = info->out[i].si;
199 prog->out[i].mask = info->out[i].mask;
200
201 if (i == info->io.fragDepth || i == info->io.sampleMask)
202 continue;
203 prog->out[i].hw = info->out[i].si * 4;
204
205 for (c = 0; c < 4; ++c)
206 info->out[i].slot[c] = prog->out[i].hw + c;
207
208 prog->max_out = MAX2(prog->max_out, prog->out[i].hw + 4);
209 }
210
211 if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
212 info->out[info->io.sampleMask].slot[0] = prog->max_out++;
213
214 if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
215 info->out[info->io.fragDepth].slot[2] = prog->max_out++;
216
217 if (!prog->max_out)
218 prog->max_out = 4;
219
220 return 0;
221 }
222
223 static int
224 nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info)
225 {
226 switch (info->type) {
227 case PIPE_SHADER_VERTEX:
228 return nv50_vertprog_assign_slots(info);
229 case PIPE_SHADER_GEOMETRY:
230 return nv50_vertprog_assign_slots(info);
231 case PIPE_SHADER_FRAGMENT:
232 return nv50_fragprog_assign_slots(info);
233 default:
234 return -1;
235 }
236 }
237
238 boolean
239 nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
240 {
241 struct nv50_ir_prog_info *info;
242 int ret;
243
244 info = CALLOC_STRUCT(nv50_ir_prog_info);
245 if (!info)
246 return FALSE;
247
248 info->type = prog->type;
249 info->target = chipset;
250 info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
251 info->bin.source = (void *)prog->pipe.tokens;
252
253 info->io.genUserClip = prog->vp.clpd_nr;
254
255 info->assignSlots = nv50_program_assign_varying_slots;
256
257 prog->vp.bfc[0] = 0x80;
258 prog->vp.bfc[1] = 0x80;
259 prog->vp.clpd[0] = 0x80;
260 prog->vp.clpd[1] = 0x80;
261 prog->vp.psiz = 0x80;
262 prog->vp.edgeflag = 0x80;
263 prog->gp.primid = 0x80;
264
265 info->driverPriv = prog;
266
267 #ifdef DEBUG
268 info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3);
269 info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0);
270 #else
271 info->optLevel = 3;
272 #endif
273
274 ret = nv50_ir_generate_code(info);
275 if (ret) {
276 NOUVEAU_ERR("shader translation failed: %i\n", ret);
277 goto out;
278 }
279 prog->code = info->bin.code;
280 prog->code_size = info->bin.codeSize;
281 prog->fixups = info->bin.relocData;
282 prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
283
284 if (prog->type == PIPE_SHADER_FRAGMENT) {
285 if (info->prop.fp.writesDepth) {
286 prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z;
287 prog->fp.flags[1] = 0x11;
288 }
289 if (info->prop.fp.usesDiscard)
290 prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
291 }
292
293 out:
294 FREE(info);
295 return !ret;
296 }
297
298 boolean
299 nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
300 {
301 struct nouveau_heap *heap;
302 int ret;
303 uint32_t size = align(prog->code_size, 0x40);
304
305 switch (prog->type) {
306 case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break;
307 case PIPE_SHADER_GEOMETRY: heap = nv50->screen->fp_code_heap; break;
308 case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break;
309 default:
310 assert(!"invalid program type");
311 return FALSE;
312 }
313
314 ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
315 if (ret) {
316 /* Out of space: evict everything to compactify the code segment, hoping
317 * the working set is much smaller and drifts slowly. Improve me !
318 */
319 while (heap->next) {
320 struct nv50_program *evict = heap->next->priv;
321 if (evict)
322 nouveau_heap_free(&evict->mem);
323 }
324 debug_printf("WARNING: out of code space, evicting all shaders.\n");
325 }
326 prog->code_base = prog->mem->start;
327
328 if (prog->fixups)
329 nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0);
330
331 nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
332 (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
333 NOUVEAU_BO_VRAM, prog->code_size, prog->code);
334
335 BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1);
336 PUSH_DATA (nv50->base.pushbuf, 0);
337
338 return TRUE;
339 }
340
341 void
342 nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
343 {
344 const struct pipe_shader_state pipe = p->pipe;
345 const ubyte type = p->type;
346
347 if (p->mem)
348 nouveau_heap_free(&p->mem);
349
350 if (p->code)
351 FREE(p->code);
352
353 if (p->fixups)
354 FREE(p->fixups);
355
356 memset(p, 0, sizeof(*p));
357
358 p->pipe = pipe;
359 p->type = type;
360 }