nvc0: add support for PIPE_CAP_SAMPLE_SHADING
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_shader_state.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "pipe/p_context.h"
24 #include "pipe/p_defines.h"
25 #include "pipe/p_state.h"
26 #include "util/u_inlines.h"
27
28 #include "nvc0/nvc0_context.h"
29
30 static INLINE void
31 nvc0_program_update_context_state(struct nvc0_context *nvc0,
32 struct nvc0_program *prog, int stage)
33 {
34 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
35
36 if (prog && prog->need_tls) {
37 const uint32_t flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
38 if (!nvc0->state.tls_required)
39 BCTX_REFN_bo(nvc0->bufctx_3d, TLS, flags, nvc0->screen->tls);
40 nvc0->state.tls_required |= 1 << stage;
41 } else {
42 if (nvc0->state.tls_required == (1 << stage))
43 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TLS);
44 nvc0->state.tls_required &= ~(1 << stage);
45 }
46
47 if (prog && prog->immd_size) {
48 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
49 /* NOTE: may overlap code of a different shader */
50 PUSH_DATA (push, align(prog->immd_size, 0x100));
51 PUSH_DATAh(push, nvc0->screen->text->offset + prog->immd_base);
52 PUSH_DATA (push, nvc0->screen->text->offset + prog->immd_base);
53 BEGIN_NVC0(push, NVC0_3D(CB_BIND(stage)), 1);
54 PUSH_DATA (push, (14 << 4) | 1);
55
56 nvc0->state.c14_bound |= 1 << stage;
57 } else
58 if (nvc0->state.c14_bound & (1 << stage)) {
59 BEGIN_NVC0(push, NVC0_3D(CB_BIND(stage)), 1);
60 PUSH_DATA (push, (14 << 4) | 0);
61
62 nvc0->state.c14_bound &= ~(1 << stage);
63 }
64 }
65
66 static INLINE boolean
67 nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
68 {
69 if (prog->mem)
70 return TRUE;
71
72 if (!prog->translated) {
73 prog->translated = nvc0_program_translate(
74 prog, nvc0->screen->base.device->chipset);
75 if (!prog->translated)
76 return FALSE;
77 }
78
79 if (likely(prog->code_size))
80 return nvc0_program_upload_code(nvc0, prog);
81 return TRUE; /* stream output info only */
82 }
83
84 void
85 nvc0_vertprog_validate(struct nvc0_context *nvc0)
86 {
87 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
88 struct nvc0_program *vp = nvc0->vertprog;
89
90 if (!nvc0_program_validate(nvc0, vp))
91 return;
92 nvc0_program_update_context_state(nvc0, vp, 0);
93
94 BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 2);
95 PUSH_DATA (push, 0x11);
96 PUSH_DATA (push, vp->code_base);
97 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(1)), 1);
98 PUSH_DATA (push, vp->num_gprs);
99
100 // BEGIN_NVC0(push, NVC0_3D_(0x163c), 1);
101 // PUSH_DATA (push, 0);
102 }
103
104 void
105 nvc0_fragprog_validate(struct nvc0_context *nvc0)
106 {
107 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
108 struct nvc0_program *fp = nvc0->fragprog;
109
110 fp->fp.sample_interp = nvc0->min_samples > 1;
111
112 if (!nvc0_program_validate(nvc0, fp))
113 return;
114 nvc0_program_update_context_state(nvc0, fp, 4);
115
116 if (fp->fp.early_z != nvc0->state.early_z_forced) {
117 nvc0->state.early_z_forced = fp->fp.early_z;
118 IMMED_NVC0(push, NVC0_3D(FORCE_EARLY_FRAGMENT_TESTS), fp->fp.early_z);
119 }
120
121 BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 2);
122 PUSH_DATA (push, 0x51);
123 PUSH_DATA (push, fp->code_base);
124 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1);
125 PUSH_DATA (push, fp->num_gprs);
126
127 BEGIN_NVC0(push, SUBC_3D(0x0360), 2);
128 PUSH_DATA (push, 0x20164010);
129 PUSH_DATA (push, 0x20);
130 BEGIN_NVC0(push, NVC0_3D(ZCULL_TEST_MASK), 1);
131 PUSH_DATA (push, fp->flags[0]);
132 }
133
134 void
135 nvc0_tctlprog_validate(struct nvc0_context *nvc0)
136 {
137 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
138 struct nvc0_program *tp = nvc0->tctlprog;
139
140 if (tp && nvc0_program_validate(nvc0, tp)) {
141 if (tp->tp.tess_mode != ~0) {
142 BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);
143 PUSH_DATA (push, tp->tp.tess_mode);
144 }
145 BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2);
146 PUSH_DATA (push, 0x21);
147 PUSH_DATA (push, tp->code_base);
148 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1);
149 PUSH_DATA (push, tp->num_gprs);
150
151 if (tp->tp.input_patch_size <= 32)
152 IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), tp->tp.input_patch_size);
153 } else {
154 BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
155 PUSH_DATA (push, 0x20);
156 }
157 nvc0_program_update_context_state(nvc0, tp, 1);
158 }
159
160 void
161 nvc0_tevlprog_validate(struct nvc0_context *nvc0)
162 {
163 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
164 struct nvc0_program *tp = nvc0->tevlprog;
165
166 if (tp && nvc0_program_validate(nvc0, tp)) {
167 if (tp->tp.tess_mode != ~0) {
168 BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);
169 PUSH_DATA (push, tp->tp.tess_mode);
170 }
171 BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
172 PUSH_DATA (push, 0x31);
173 BEGIN_NVC0(push, NVC0_3D(SP_START_ID(3)), 1);
174 PUSH_DATA (push, tp->code_base);
175 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(3)), 1);
176 PUSH_DATA (push, tp->num_gprs);
177 } else {
178 BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
179 PUSH_DATA (push, 0x30);
180 }
181 nvc0_program_update_context_state(nvc0, tp, 2);
182 }
183
184 void
185 nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
186 {
187 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
188 struct nvc0_program *gp = nvc0->gmtyprog;
189
190 if (gp)
191 nvc0_program_validate(nvc0, gp);
192
193 /* we allow GPs with no code for specifying stream output state only */
194 if (gp && gp->code_size) {
195 const boolean gp_selects_layer = !!(gp->hdr[13] & (1 << 9));
196
197 BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
198 PUSH_DATA (push, 0x41);
199 BEGIN_NVC0(push, NVC0_3D(SP_START_ID(4)), 1);
200 PUSH_DATA (push, gp->code_base);
201 BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(4)), 1);
202 PUSH_DATA (push, gp->num_gprs);
203 BEGIN_NVC0(push, NVC0_3D(LAYER), 1);
204 PUSH_DATA (push, gp_selects_layer ? NVC0_3D_LAYER_USE_GP : 0);
205 } else {
206 IMMED_NVC0(push, NVC0_3D(LAYER), 0);
207 BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
208 PUSH_DATA (push, 0x40);
209 }
210 nvc0_program_update_context_state(nvc0, gp, 3);
211 }
212
213 void
214 nvc0_tfb_validate(struct nvc0_context *nvc0)
215 {
216 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
217 struct nvc0_transform_feedback_state *tfb;
218 unsigned b;
219
220 if (nvc0->gmtyprog) tfb = nvc0->gmtyprog->tfb;
221 else
222 if (nvc0->tevlprog) tfb = nvc0->tevlprog->tfb;
223 else
224 tfb = nvc0->vertprog->tfb;
225
226 IMMED_NVC0(push, NVC0_3D(TFB_ENABLE), (tfb && nvc0->num_tfbbufs) ? 1 : 0);
227
228 if (tfb && tfb != nvc0->state.tfb) {
229 for (b = 0; b < 4; ++b) {
230 if (tfb->varying_count[b]) {
231 unsigned n = (tfb->varying_count[b] + 3) / 4;
232
233 BEGIN_NVC0(push, NVC0_3D(TFB_STREAM(b)), 3);
234 PUSH_DATA (push, 0);
235 PUSH_DATA (push, tfb->varying_count[b]);
236 PUSH_DATA (push, tfb->stride[b]);
237 BEGIN_NVC0(push, NVC0_3D(TFB_VARYING_LOCS(b, 0)), n);
238 PUSH_DATAp(push, tfb->varying_index[b], n);
239
240 if (nvc0->tfbbuf[b])
241 nvc0_so_target(nvc0->tfbbuf[b])->stride = tfb->stride[b];
242 } else {
243 IMMED_NVC0(push, NVC0_3D(TFB_VARYING_COUNT(b)), 0);
244 }
245 }
246 }
247 nvc0->state.tfb = tfb;
248
249 if (!(nvc0->dirty & NVC0_NEW_TFB_TARGETS))
250 return;
251 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TFB);
252
253 for (b = 0; b < nvc0->num_tfbbufs; ++b) {
254 struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]);
255 struct nv04_resource *buf = nv04_resource(targ->pipe.buffer);
256
257 if (tfb)
258 targ->stride = tfb->stride[b];
259
260 if (!(nvc0->tfbbuf_dirty & (1 << b)))
261 continue;
262
263 if (!targ->clean)
264 nvc0_query_fifo_wait(push, targ->pq);
265 BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5);
266 PUSH_DATA (push, 1);
267 PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
268 PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
269 PUSH_DATA (push, targ->pipe.buffer_size);
270 if (!targ->clean) {
271 nvc0_query_pushbuf_submit(push, targ->pq, 0x4);
272 } else {
273 PUSH_DATA(push, 0); /* TFB_BUFFER_OFFSET */
274 targ->clean = FALSE;
275 }
276 BCTX_REFN(nvc0->bufctx_3d, TFB, buf, WR);
277 }
278 for (; b < 4; ++b)
279 IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0);
280 }