af837fc4a33122527b89fe7d85c40dc31005604e
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_shader_state.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "pipe/p_context.h"
24 #include "pipe/p_defines.h"
25 #include "pipe/p_state.h"
26 #include "util/u_inlines.h"
27
28 #include "nvc0/nvc0_context.h"
29 #include "nvc0/nvc0_query_hw.h"
30
31 static inline void
32 nvc0_program_update_context_state(struct nvc0_context *nvc0,
33 struct nvc0_program *prog, int stage)
34 {
35 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
36
37 if (prog && prog->need_tls) {
38 const uint32_t flags = NV_VRAM_DOMAIN(&nvc0->screen->base) | NOUVEAU_BO_RDWR;
39 if (!nvc0->state.tls_required)
40 BCTX_REFN_bo(nvc0->bufctx_3d, TLS, flags, nvc0->screen->tls);
41 nvc0->state.tls_required |= 1 << stage;
42 } else {
43 if (nvc0->state.tls_required == (1 << stage))
44 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TLS);
45 nvc0->state.tls_required &= ~(1 << stage);
46 }
47
48 if (prog && prog->immd_size) {
49 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
50 /* NOTE: may overlap code of a different shader */
51 PUSH_DATA (push, align(prog->immd_size, 0x100));
52 PUSH_DATAh(push, nvc0->screen->text->offset + prog->immd_base);
53 PUSH_DATA (push, nvc0->screen->text->offset + prog->immd_base);
54 BEGIN_NVC0(push, NVC0_3D(CB_BIND(stage)), 1);
55 PUSH_DATA (push, (14 << 4) | 1);
56
57 nvc0->state.c14_bound |= 1 << stage;
58 } else
59 if (nvc0->state.c14_bound & (1 << stage)) {
60 BEGIN_NVC0(push, NVC0_3D(CB_BIND(stage)), 1);
61 PUSH_DATA (push, (14 << 4) | 0);
62
63 nvc0->state.c14_bound &= ~(1 << stage);
64 }
65 }
66
67 static inline bool
68 nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
69 {
70 if (prog->mem)
71 return true;
72
73 if (!prog->translated) {
74 prog->translated = nvc0_program_translate(
75 prog, nvc0->screen->base.device->chipset);
76 if (!prog->translated)
77 return false;
78 }
79
80 if (likely(prog->code_size))
81 return nvc0_program_upload_code(nvc0, prog);
82 return true; /* stream output info only */
83 }
84
85 void
86 nvc0_vertprog_validate(struct nvc0_context *nvc0)
87 {
88 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
89 struct nvc0_program *vp = nvc0->vertprog;
90
91 if (!nvc0_program_validate(nvc0, vp))
92 return;
93 nvc0_program_update_context_state(nvc0, vp, 0);
94
95 BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 2);
96 PUSH_DATA (push, 0x11);
97 PUSH_DATA (push, vp->code_base);
98 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(1)), 1);
99 PUSH_DATA (push, vp->num_gprs);
100
101 // BEGIN_NVC0(push, NVC0_3D_(0x163c), 1);
102 // PUSH_DATA (push, 0);
103 }
104
105 void
106 nvc0_fragprog_validate(struct nvc0_context *nvc0)
107 {
108 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
109 struct nvc0_program *fp = nvc0->fragprog;
110
111 fp->fp.sample_interp = nvc0->min_samples > 1;
112
113 if (!nvc0_program_validate(nvc0, fp))
114 return;
115 nvc0_program_update_context_state(nvc0, fp, 4);
116
117 if (fp->fp.early_z != nvc0->state.early_z_forced) {
118 nvc0->state.early_z_forced = fp->fp.early_z;
119 IMMED_NVC0(push, NVC0_3D(FORCE_EARLY_FRAGMENT_TESTS), fp->fp.early_z);
120 }
121
122 BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 2);
123 PUSH_DATA (push, 0x51);
124 PUSH_DATA (push, fp->code_base);
125 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1);
126 PUSH_DATA (push, fp->num_gprs);
127
128 BEGIN_NVC0(push, SUBC_3D(0x0360), 2);
129 PUSH_DATA (push, 0x20164010);
130 PUSH_DATA (push, 0x20);
131 BEGIN_NVC0(push, NVC0_3D(ZCULL_TEST_MASK), 1);
132 PUSH_DATA (push, fp->flags[0]);
133 }
134
135 void
136 nvc0_tctlprog_validate(struct nvc0_context *nvc0)
137 {
138 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
139 struct nvc0_program *tp = nvc0->tctlprog;
140
141 if (tp && nvc0_program_validate(nvc0, tp)) {
142 if (tp->tp.tess_mode != ~0) {
143 BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);
144 PUSH_DATA (push, tp->tp.tess_mode);
145 }
146 BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2);
147 PUSH_DATA (push, 0x21);
148 PUSH_DATA (push, tp->code_base);
149 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1);
150 PUSH_DATA (push, tp->num_gprs);
151 } else {
152 tp = nvc0->tcp_empty;
153 /* not a whole lot we can do to handle this failure */
154 if (!nvc0_program_validate(nvc0, tp))
155 assert(!"unable to validate empty tcp");
156 BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2);
157 PUSH_DATA (push, 0x20);
158 PUSH_DATA (push, tp->code_base);
159 }
160 nvc0_program_update_context_state(nvc0, tp, 1);
161 }
162
163 void
164 nvc0_tevlprog_validate(struct nvc0_context *nvc0)
165 {
166 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
167 struct nvc0_program *tp = nvc0->tevlprog;
168
169 if (tp && nvc0_program_validate(nvc0, tp)) {
170 if (tp->tp.tess_mode != ~0) {
171 BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);
172 PUSH_DATA (push, tp->tp.tess_mode);
173 }
174 BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
175 PUSH_DATA (push, 0x31);
176 BEGIN_NVC0(push, NVC0_3D(SP_START_ID(3)), 1);
177 PUSH_DATA (push, tp->code_base);
178 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(3)), 1);
179 PUSH_DATA (push, tp->num_gprs);
180 } else {
181 BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
182 PUSH_DATA (push, 0x30);
183 }
184 nvc0_program_update_context_state(nvc0, tp, 2);
185 }
186
187 void
188 nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
189 {
190 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
191 struct nvc0_program *gp = nvc0->gmtyprog;
192
193 if (gp)
194 nvc0_program_validate(nvc0, gp);
195
196 /* we allow GPs with no code for specifying stream output state only */
197 if (gp && gp->code_size) {
198 const bool gp_selects_layer = !!(gp->hdr[13] & (1 << 9));
199
200 BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
201 PUSH_DATA (push, 0x41);
202 BEGIN_NVC0(push, NVC0_3D(SP_START_ID(4)), 1);
203 PUSH_DATA (push, gp->code_base);
204 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(4)), 1);
205 PUSH_DATA (push, gp->num_gprs);
206 BEGIN_NVC0(push, NVC0_3D(LAYER), 1);
207 PUSH_DATA (push, gp_selects_layer ? NVC0_3D_LAYER_USE_GP : 0);
208 } else {
209 IMMED_NVC0(push, NVC0_3D(LAYER), 0);
210 BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
211 PUSH_DATA (push, 0x40);
212 }
213 nvc0_program_update_context_state(nvc0, gp, 3);
214 }
215
216 void
217 nvc0_tfb_validate(struct nvc0_context *nvc0)
218 {
219 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
220 struct nvc0_transform_feedback_state *tfb;
221 unsigned b;
222
223 if (nvc0->gmtyprog) tfb = nvc0->gmtyprog->tfb;
224 else
225 if (nvc0->tevlprog) tfb = nvc0->tevlprog->tfb;
226 else
227 tfb = nvc0->vertprog->tfb;
228
229 IMMED_NVC0(push, NVC0_3D(TFB_ENABLE), (tfb && nvc0->num_tfbbufs) ? 1 : 0);
230
231 if (tfb && tfb != nvc0->state.tfb) {
232 for (b = 0; b < 4; ++b) {
233 if (tfb->varying_count[b]) {
234 unsigned n = (tfb->varying_count[b] + 3) / 4;
235
236 BEGIN_NVC0(push, NVC0_3D(TFB_STREAM(b)), 3);
237 PUSH_DATA (push, tfb->stream[b]);
238 PUSH_DATA (push, tfb->varying_count[b]);
239 PUSH_DATA (push, tfb->stride[b]);
240 BEGIN_NVC0(push, NVC0_3D(TFB_VARYING_LOCS(b, 0)), n);
241 PUSH_DATAp(push, tfb->varying_index[b], n);
242
243 if (nvc0->tfbbuf[b])
244 nvc0_so_target(nvc0->tfbbuf[b])->stride = tfb->stride[b];
245 } else {
246 IMMED_NVC0(push, NVC0_3D(TFB_VARYING_COUNT(b)), 0);
247 }
248 }
249 }
250 nvc0->state.tfb = tfb;
251
252 if (!(nvc0->dirty & NVC0_NEW_TFB_TARGETS))
253 return;
254 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TFB);
255
256 for (b = 0; b < nvc0->num_tfbbufs; ++b) {
257 struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]);
258 struct nv04_resource *buf;
259
260 if (!targ) {
261 IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0);
262 continue;
263 }
264
265 if (tfb)
266 targ->stride = tfb->stride[b];
267
268 buf = nv04_resource(targ->pipe.buffer);
269
270 BCTX_REFN(nvc0->bufctx_3d, TFB, buf, WR);
271
272 if (!(nvc0->tfbbuf_dirty & (1 << b)))
273 continue;
274
275 if (!targ->clean)
276 nvc0_hw_query_fifo_wait(push, nvc0_query(targ->pq));
277 BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5);
278 PUSH_DATA (push, 1);
279 PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
280 PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
281 PUSH_DATA (push, targ->pipe.buffer_size);
282 if (!targ->clean) {
283 nvc0_hw_query_pushbuf_submit(push, nvc0_query(targ->pq), 0x4);
284 } else {
285 PUSH_DATA(push, 0); /* TFB_BUFFER_OFFSET */
286 targ->clean = false;
287 }
288 }
289 for (; b < 4; ++b)
290 IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0);
291 }