6b02ed5680af8b02542b2628421a62dd5fb10670
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_shader_state.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "pipe/p_context.h"
24 #include "pipe/p_defines.h"
25 #include "pipe/p_state.h"
26 #include "util/u_inlines.h"
27
28 #include "nvc0/nvc0_context.h"
29 #include "nvc0/nvc0_query_hw.h"
30
31 #include "nvc0/nvc0_compute.xml.h"
32
33 static inline void
34 nvc0_program_update_context_state(struct nvc0_context *nvc0,
35 struct nvc0_program *prog, int stage)
36 {
37 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
38
39 if (prog && prog->need_tls) {
40 const uint32_t flags = NV_VRAM_DOMAIN(&nvc0->screen->base) | NOUVEAU_BO_RDWR;
41 if (!nvc0->state.tls_required)
42 BCTX_REFN_bo(nvc0->bufctx_3d, 3D_TLS, flags, nvc0->screen->tls);
43 nvc0->state.tls_required |= 1 << stage;
44 } else {
45 if (nvc0->state.tls_required == (1 << stage))
46 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TLS);
47 nvc0->state.tls_required &= ~(1 << stage);
48 }
49
50 if (prog && prog->immd_size) {
51 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
52 /* NOTE: may overlap code of a different shader */
53 PUSH_DATA (push, align(prog->immd_size, 0x100));
54 PUSH_DATAh(push, nvc0->screen->text->offset + prog->immd_base);
55 PUSH_DATA (push, nvc0->screen->text->offset + prog->immd_base);
56 BEGIN_NVC0(push, NVC0_3D(CB_BIND(stage)), 1);
57 PUSH_DATA (push, (14 << 4) | 1);
58
59 nvc0->state.c14_bound |= 1 << stage;
60 } else
61 if (nvc0->state.c14_bound & (1 << stage)) {
62 BEGIN_NVC0(push, NVC0_3D(CB_BIND(stage)), 1);
63 PUSH_DATA (push, (14 << 4) | 0);
64
65 nvc0->state.c14_bound &= ~(1 << stage);
66 }
67 }
68
69 static inline bool
70 nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
71 {
72 if (prog->mem)
73 return true;
74
75 if (!prog->translated) {
76 prog->translated = nvc0_program_translate(
77 prog, nvc0->screen->base.device->chipset, &nvc0->base.debug);
78 if (!prog->translated)
79 return false;
80 }
81
82 if (likely(prog->code_size))
83 return nvc0_program_upload_code(nvc0, prog);
84 return true; /* stream output info only */
85 }
86
87 void
88 nvc0_vertprog_validate(struct nvc0_context *nvc0)
89 {
90 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
91 struct nvc0_program *vp = nvc0->vertprog;
92
93 if (!nvc0_program_validate(nvc0, vp))
94 return;
95 nvc0_program_update_context_state(nvc0, vp, 0);
96
97 BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 2);
98 PUSH_DATA (push, 0x11);
99 PUSH_DATA (push, vp->code_base);
100 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(1)), 1);
101 PUSH_DATA (push, vp->num_gprs);
102
103 // BEGIN_NVC0(push, NVC0_3D_(0x163c), 1);
104 // PUSH_DATA (push, 0);
105 }
106
107 void
108 nvc0_fragprog_validate(struct nvc0_context *nvc0)
109 {
110 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
111 struct nvc0_program *fp = nvc0->fragprog;
112 struct pipe_rasterizer_state *rast = &nvc0->rast->pipe;
113
114 if (fp->fp.force_persample_interp != rast->force_persample_interp) {
115 /* Force the program to be reuploaded, which will trigger interp fixups
116 * to get applied
117 */
118 if (fp->mem)
119 nouveau_heap_free(&fp->mem);
120
121 fp->fp.force_persample_interp = rast->force_persample_interp;
122 }
123
124 /* Shade model works well enough when both colors follow it. However if one
125 * (or both) is explicitly set, then we have to go the patching route.
126 */
127 bool has_explicit_color = fp->fp.colors &&
128 (((fp->fp.colors & 1) && !fp->fp.color_interp[0]) ||
129 ((fp->fp.colors & 2) && !fp->fp.color_interp[1]));
130 bool hwflatshade = false;
131 if (has_explicit_color && fp->fp.flatshade != rast->flatshade) {
132 /* Force re-upload */
133 if (fp->mem)
134 nouveau_heap_free(&fp->mem);
135
136 fp->fp.flatshade = rast->flatshade;
137
138 /* Always smooth-shade in this mode, the shader will decide on its own
139 * when to flat-shade.
140 */
141 } else if (!has_explicit_color) {
142 hwflatshade = rast->flatshade;
143
144 /* No need to binary-patch the shader each time, make sure that it's set
145 * up for the default behaviour.
146 */
147 fp->fp.flatshade = 0;
148 }
149
150 if (hwflatshade != nvc0->state.flatshade) {
151 nvc0->state.flatshade = hwflatshade;
152 BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1);
153 PUSH_DATA (push, hwflatshade ? NVC0_3D_SHADE_MODEL_FLAT :
154 NVC0_3D_SHADE_MODEL_SMOOTH);
155 }
156
157 if (fp->mem && !(nvc0->dirty_3d & NVC0_NEW_3D_FRAGPROG)) {
158 return;
159 }
160
161 if (!nvc0_program_validate(nvc0, fp))
162 return;
163 nvc0_program_update_context_state(nvc0, fp, 4);
164
165 if (fp->fp.early_z != nvc0->state.early_z_forced) {
166 nvc0->state.early_z_forced = fp->fp.early_z;
167 IMMED_NVC0(push, NVC0_3D(FORCE_EARLY_FRAGMENT_TESTS), fp->fp.early_z);
168 }
169
170 BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 2);
171 PUSH_DATA (push, 0x51);
172 PUSH_DATA (push, fp->code_base);
173 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1);
174 PUSH_DATA (push, fp->num_gprs);
175
176 BEGIN_NVC0(push, SUBC_3D(0x0360), 2);
177 PUSH_DATA (push, 0x20164010);
178 PUSH_DATA (push, 0x20);
179 BEGIN_NVC0(push, NVC0_3D(ZCULL_TEST_MASK), 1);
180 PUSH_DATA (push, fp->flags[0]);
181 }
182
183 void
184 nvc0_tctlprog_validate(struct nvc0_context *nvc0)
185 {
186 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
187 struct nvc0_program *tp = nvc0->tctlprog;
188
189 if (tp && nvc0_program_validate(nvc0, tp)) {
190 if (tp->tp.tess_mode != ~0) {
191 BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);
192 PUSH_DATA (push, tp->tp.tess_mode);
193 }
194 BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2);
195 PUSH_DATA (push, 0x21);
196 PUSH_DATA (push, tp->code_base);
197 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1);
198 PUSH_DATA (push, tp->num_gprs);
199 } else {
200 tp = nvc0->tcp_empty;
201 /* not a whole lot we can do to handle this failure */
202 if (!nvc0_program_validate(nvc0, tp))
203 assert(!"unable to validate empty tcp");
204 BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2);
205 PUSH_DATA (push, 0x20);
206 PUSH_DATA (push, tp->code_base);
207 }
208 nvc0_program_update_context_state(nvc0, tp, 1);
209 }
210
211 void
212 nvc0_tevlprog_validate(struct nvc0_context *nvc0)
213 {
214 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
215 struct nvc0_program *tp = nvc0->tevlprog;
216
217 if (tp && nvc0_program_validate(nvc0, tp)) {
218 if (tp->tp.tess_mode != ~0) {
219 BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);
220 PUSH_DATA (push, tp->tp.tess_mode);
221 }
222 BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
223 PUSH_DATA (push, 0x31);
224 BEGIN_NVC0(push, NVC0_3D(SP_START_ID(3)), 1);
225 PUSH_DATA (push, tp->code_base);
226 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(3)), 1);
227 PUSH_DATA (push, tp->num_gprs);
228 } else {
229 BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
230 PUSH_DATA (push, 0x30);
231 }
232 nvc0_program_update_context_state(nvc0, tp, 2);
233 }
234
235 void
236 nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
237 {
238 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
239 struct nvc0_program *gp = nvc0->gmtyprog;
240
241 /* we allow GPs with no code for specifying stream output state only */
242 if (gp && nvc0_program_validate(nvc0, gp) && gp->code_size) {
243 const bool gp_selects_layer = !!(gp->hdr[13] & (1 << 9));
244
245 BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
246 PUSH_DATA (push, 0x41);
247 BEGIN_NVC0(push, NVC0_3D(SP_START_ID(4)), 1);
248 PUSH_DATA (push, gp->code_base);
249 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(4)), 1);
250 PUSH_DATA (push, gp->num_gprs);
251 BEGIN_NVC0(push, NVC0_3D(LAYER), 1);
252 PUSH_DATA (push, gp_selects_layer ? NVC0_3D_LAYER_USE_GP : 0);
253 } else {
254 IMMED_NVC0(push, NVC0_3D(LAYER), 0);
255 BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
256 PUSH_DATA (push, 0x40);
257 }
258 nvc0_program_update_context_state(nvc0, gp, 3);
259 }
260
261 void
262 nvc0_compprog_validate(struct nvc0_context *nvc0)
263 {
264 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
265 struct nvc0_program *cp = nvc0->compprog;
266
267 if (cp && !nvc0_program_validate(nvc0, cp))
268 return;
269
270 BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
271 PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE);
272 }
273
274 void
275 nvc0_tfb_validate(struct nvc0_context *nvc0)
276 {
277 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
278 struct nvc0_transform_feedback_state *tfb;
279 unsigned b;
280
281 if (nvc0->gmtyprog) tfb = nvc0->gmtyprog->tfb;
282 else
283 if (nvc0->tevlprog) tfb = nvc0->tevlprog->tfb;
284 else
285 tfb = nvc0->vertprog->tfb;
286
287 IMMED_NVC0(push, NVC0_3D(TFB_ENABLE), (tfb && nvc0->num_tfbbufs) ? 1 : 0);
288
289 if (tfb && tfb != nvc0->state.tfb) {
290 for (b = 0; b < 4; ++b) {
291 if (tfb->varying_count[b]) {
292 unsigned n = (tfb->varying_count[b] + 3) / 4;
293
294 BEGIN_NVC0(push, NVC0_3D(TFB_STREAM(b)), 3);
295 PUSH_DATA (push, tfb->stream[b]);
296 PUSH_DATA (push, tfb->varying_count[b]);
297 PUSH_DATA (push, tfb->stride[b]);
298 BEGIN_NVC0(push, NVC0_3D(TFB_VARYING_LOCS(b, 0)), n);
299 PUSH_DATAp(push, tfb->varying_index[b], n);
300
301 if (nvc0->tfbbuf[b])
302 nvc0_so_target(nvc0->tfbbuf[b])->stride = tfb->stride[b];
303 } else {
304 IMMED_NVC0(push, NVC0_3D(TFB_VARYING_COUNT(b)), 0);
305 }
306 }
307 }
308 nvc0->state.tfb = tfb;
309
310 if (!(nvc0->dirty_3d & NVC0_NEW_3D_TFB_TARGETS))
311 return;
312 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TFB);
313
314 for (b = 0; b < nvc0->num_tfbbufs; ++b) {
315 struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]);
316 struct nv04_resource *buf;
317
318 if (!targ) {
319 IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0);
320 continue;
321 }
322
323 if (tfb)
324 targ->stride = tfb->stride[b];
325
326 buf = nv04_resource(targ->pipe.buffer);
327
328 BCTX_REFN(nvc0->bufctx_3d, 3D_TFB, buf, WR);
329
330 if (!(nvc0->tfbbuf_dirty & (1 << b)))
331 continue;
332
333 if (!targ->clean)
334 nvc0_hw_query_fifo_wait(nvc0, nvc0_query(targ->pq));
335 nouveau_pushbuf_space(push, 0, 0, 1);
336 BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5);
337 PUSH_DATA (push, 1);
338 PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
339 PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
340 PUSH_DATA (push, targ->pipe.buffer_size);
341 if (!targ->clean) {
342 nvc0_hw_query_pushbuf_submit(push, nvc0_query(targ->pq), 0x4);
343 } else {
344 PUSH_DATA(push, 0); /* TFB_BUFFER_OFFSET */
345 targ->clean = false;
346 }
347 }
348 for (; b < 4; ++b)
349 IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0);
350 }