gallium: Enable multiple constant buffers for vertex and geometry shaders.
[mesa.git] / src / gallium / drivers / nv40 / nv40_draw.c
1 #include "pipe/p_shader_tokens.h"
2 #include "pipe/p_inlines.h"
3
4 #include "util/u_pack_color.h"
5
6 #include "draw/draw_context.h"
7 #include "draw/draw_vertex.h"
8 #include "draw/draw_pipe.h"
9
10 #include "nv40_context.h"
11 #define NV40_SHADER_NO_FUCKEDNESS
12 #include "nv40_shader.h"
13
14 /* Simple, but crappy, swtnl path, hopefully we wont need to hit this very
15 * often at all. Uses "quadro style" vertex submission + a fixed vertex
16 * layout to avoid the need to generate a vertex program or vtxfmt.
17 */
18
19 struct nv40_render_stage {
20 struct draw_stage stage;
21 struct nv40_context *nv40;
22 unsigned prim;
23 };
24
25 static INLINE struct nv40_render_stage *
26 nv40_render_stage(struct draw_stage *stage)
27 {
28 return (struct nv40_render_stage *)stage;
29 }
30
31 static INLINE void
32 nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v)
33 {
34 struct nv40_screen *screen = nv40->screen;
35 struct nouveau_channel *chan = screen->base.channel;
36 struct nouveau_grobj *curie = screen->curie;
37 unsigned i;
38
39 for (i = 0; i < nv40->swtnl.nr_attribs; i++) {
40 unsigned idx = nv40->swtnl.draw[i];
41 unsigned hw = nv40->swtnl.hw[i];
42
43 switch (nv40->swtnl.emit[i]) {
44 case EMIT_OMIT:
45 break;
46 case EMIT_1F:
47 BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_1F(hw), 1);
48 OUT_RING (chan, fui(v->data[idx][0]));
49 break;
50 case EMIT_2F:
51 BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_2F_X(hw), 2);
52 OUT_RING (chan, fui(v->data[idx][0]));
53 OUT_RING (chan, fui(v->data[idx][1]));
54 break;
55 case EMIT_3F:
56 BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_3F_X(hw), 3);
57 OUT_RING (chan, fui(v->data[idx][0]));
58 OUT_RING (chan, fui(v->data[idx][1]));
59 OUT_RING (chan, fui(v->data[idx][2]));
60 break;
61 case EMIT_4F:
62 BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4F_X(hw), 4);
63 OUT_RING (chan, fui(v->data[idx][0]));
64 OUT_RING (chan, fui(v->data[idx][1]));
65 OUT_RING (chan, fui(v->data[idx][2]));
66 OUT_RING (chan, fui(v->data[idx][3]));
67 break;
68 case EMIT_4UB:
69 BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4UB(hw), 1);
70 OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][0]),
71 float_to_ubyte(v->data[idx][1]),
72 float_to_ubyte(v->data[idx][2]),
73 float_to_ubyte(v->data[idx][3])));
74 break;
75 default:
76 assert(0);
77 break;
78 }
79 }
80 }
81
82 static INLINE void
83 nv40_render_prim(struct draw_stage *stage, struct prim_header *prim,
84 unsigned mode, unsigned count)
85 {
86 struct nv40_render_stage *rs = nv40_render_stage(stage);
87 struct nv40_context *nv40 = rs->nv40;
88
89 struct nv40_screen *screen = nv40->screen;
90 struct nouveau_channel *chan = screen->base.channel;
91 struct nouveau_pushbuf *pb = chan->pushbuf;
92 struct nouveau_grobj *curie = screen->curie;
93 unsigned i;
94
95 /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */
96 if (pb->remaining < ((count * 20) + 6)) {
97 if (rs->prim != NV40TCL_BEGIN_END_STOP) {
98 NOUVEAU_ERR("AIII, missed flush\n");
99 assert(0);
100 }
101 FIRE_RING(chan);
102 nv40_state_emit(nv40);
103 }
104
105 /* Switch primitive modes if necessary */
106 if (rs->prim != mode) {
107 if (rs->prim != NV40TCL_BEGIN_END_STOP) {
108 BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
109 OUT_RING (chan, NV40TCL_BEGIN_END_STOP);
110 }
111
112 BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
113 OUT_RING (chan, mode);
114 rs->prim = mode;
115 }
116
117 /* Emit vertex data */
118 for (i = 0; i < count; i++)
119 nv40_render_vertex(nv40, prim->v[i]);
120
121 /* If it's likely we'll need to empty the push buffer soon, finish
122 * off the primitive now.
123 */
124 if (pb->remaining < ((count * 20) + 6)) {
125 BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
126 OUT_RING (chan, NV40TCL_BEGIN_END_STOP);
127 rs->prim = NV40TCL_BEGIN_END_STOP;
128 }
129 }
130
131 static void
132 nv40_render_point(struct draw_stage *draw, struct prim_header *prim)
133 {
134 nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_POINTS, 1);
135 }
136
137 static void
138 nv40_render_line(struct draw_stage *draw, struct prim_header *prim)
139 {
140 nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_LINES, 2);
141 }
142
143 static void
144 nv40_render_tri(struct draw_stage *draw, struct prim_header *prim)
145 {
146 nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_TRIANGLES, 3);
147 }
148
149 static void
150 nv40_render_flush(struct draw_stage *draw, unsigned flags)
151 {
152 struct nv40_render_stage *rs = nv40_render_stage(draw);
153 struct nv40_context *nv40 = rs->nv40;
154 struct nv40_screen *screen = nv40->screen;
155 struct nouveau_channel *chan = screen->base.channel;
156 struct nouveau_grobj *curie = screen->curie;
157
158 if (rs->prim != NV40TCL_BEGIN_END_STOP) {
159 BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
160 OUT_RING (chan, NV40TCL_BEGIN_END_STOP);
161 rs->prim = NV40TCL_BEGIN_END_STOP;
162 }
163 }
164
165 static void
166 nv40_render_reset_stipple_counter(struct draw_stage *draw)
167 {
168 }
169
170 static void
171 nv40_render_destroy(struct draw_stage *draw)
172 {
173 FREE(draw);
174 }
175
176 static INLINE void
177 emit_mov(struct nv40_vertex_program *vp,
178 unsigned dst, unsigned src, unsigned vor, unsigned mask)
179 {
180 struct nv40_vertex_program_exec *inst;
181
182 vp->insns = realloc(vp->insns,
183 sizeof(struct nv40_vertex_program_exec) *
184 ++vp->nr_insns);
185 inst = &vp->insns[vp->nr_insns - 1];
186
187 inst->data[0] = 0x401f9c6c;
188 inst->data[1] = 0x0040000d | (src << 8);
189 inst->data[2] = 0x8106c083;
190 inst->data[3] = 0x6041ff80 | (dst << 2) | (mask << 13);
191 inst->const_index = -1;
192 inst->has_branch_offset = FALSE;
193
194 vp->ir |= (1 << src);
195 if (vor != ~0)
196 vp->or |= (1 << vor);
197 }
198
199 static struct nv40_vertex_program *
200 create_drawvp(struct nv40_context *nv40)
201 {
202 struct nv40_vertex_program *vp = CALLOC_STRUCT(nv40_vertex_program);
203 unsigned i;
204
205 emit_mov(vp, NV40_VP_INST_DEST_POS, 0, ~0, 0xf);
206 emit_mov(vp, NV40_VP_INST_DEST_COL0, 3, 0, 0xf);
207 emit_mov(vp, NV40_VP_INST_DEST_COL1, 4, 1, 0xf);
208 emit_mov(vp, NV40_VP_INST_DEST_BFC0, 3, 2, 0xf);
209 emit_mov(vp, NV40_VP_INST_DEST_BFC1, 4, 3, 0xf);
210 emit_mov(vp, NV40_VP_INST_DEST_FOGC, 5, 4, 0x8);
211 for (i = 0; i < 8; i++)
212 emit_mov(vp, NV40_VP_INST_DEST_TC(i), 8 + i, 14 + i, 0xf);
213
214 vp->insns[vp->nr_insns - 1].data[3] |= 1;
215 vp->translated = TRUE;
216 return vp;
217 }
218
219 struct draw_stage *
220 nv40_draw_render_stage(struct nv40_context *nv40)
221 {
222 struct nv40_render_stage *render = CALLOC_STRUCT(nv40_render_stage);
223
224 if (!nv40->swtnl.vertprog)
225 nv40->swtnl.vertprog = create_drawvp(nv40);
226
227 render->nv40 = nv40;
228 render->stage.draw = nv40->draw;
229 render->stage.point = nv40_render_point;
230 render->stage.line = nv40_render_line;
231 render->stage.tri = nv40_render_tri;
232 render->stage.flush = nv40_render_flush;
233 render->stage.reset_stipple_counter = nv40_render_reset_stipple_counter;
234 render->stage.destroy = nv40_render_destroy;
235
236 return &render->stage;
237 }
238
239 void
240 nv40_draw_elements_swtnl(struct pipe_context *pipe,
241 struct pipe_buffer *idxbuf, unsigned idxbuf_size,
242 unsigned mode, unsigned start, unsigned count)
243 {
244 struct nv40_context *nv40 = nv40_context(pipe);
245 struct pipe_screen *pscreen = pipe->screen;
246 unsigned i;
247 void *map;
248
249 if (!nv40_state_validate_swtnl(nv40))
250 return;
251 nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF);
252 nv40_state_emit(nv40);
253
254 for (i = 0; i < nv40->vtxbuf_nr; i++) {
255 map = pipe_buffer_map(pscreen, nv40->vtxbuf[i].buffer,
256 PIPE_BUFFER_USAGE_CPU_READ);
257 draw_set_mapped_vertex_buffer(nv40->draw, i, map);
258 }
259
260 if (idxbuf) {
261 map = pipe_buffer_map(pscreen, idxbuf,
262 PIPE_BUFFER_USAGE_CPU_READ);
263 draw_set_mapped_element_buffer(nv40->draw, idxbuf_size, map);
264 } else {
265 draw_set_mapped_element_buffer(nv40->draw, 0, NULL);
266 }
267
268 if (nv40->constbuf[PIPE_SHADER_VERTEX]) {
269 const unsigned nr = nv40->constbuf_nr[PIPE_SHADER_VERTEX];
270
271 map = pipe_buffer_map(pscreen,
272 nv40->constbuf[PIPE_SHADER_VERTEX],
273 PIPE_BUFFER_USAGE_CPU_READ);
274 draw_set_mapped_constant_buffer(nv40->draw, PIPE_SHADER_VERTEX, 0,
275 map, nr);
276 }
277
278 draw_arrays(nv40->draw, mode, start, count);
279
280 for (i = 0; i < nv40->vtxbuf_nr; i++)
281 pipe_buffer_unmap(pscreen, nv40->vtxbuf[i].buffer);
282
283 if (idxbuf)
284 pipe_buffer_unmap(pscreen, idxbuf);
285
286 if (nv40->constbuf[PIPE_SHADER_VERTEX])
287 pipe_buffer_unmap(pscreen, nv40->constbuf[PIPE_SHADER_VERTEX]);
288
289 draw_flush(nv40->draw);
290 pipe->flush(pipe, 0, NULL);
291 }
292
293 static INLINE void
294 emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit,
295 unsigned semantic, unsigned index)
296 {
297 unsigned draw_out = draw_find_shader_output(nv40->draw, semantic, index);
298 unsigned a = nv40->swtnl.nr_attribs++;
299
300 nv40->swtnl.hw[a] = hw;
301 nv40->swtnl.emit[a] = emit;
302 nv40->swtnl.draw[a] = draw_out;
303 }
304
305 static boolean
306 nv40_state_vtxfmt_validate(struct nv40_context *nv40)
307 {
308 struct nv40_fragment_program *fp = nv40->fragprog;
309 unsigned colour = 0, texcoords = 0, fog = 0, i;
310
311 /* Determine needed fragprog inputs */
312 for (i = 0; i < fp->info.num_inputs; i++) {
313 switch (fp->info.input_semantic_name[i]) {
314 case TGSI_SEMANTIC_POSITION:
315 break;
316 case TGSI_SEMANTIC_COLOR:
317 colour |= (1 << fp->info.input_semantic_index[i]);
318 break;
319 case TGSI_SEMANTIC_GENERIC:
320 texcoords |= (1 << fp->info.input_semantic_index[i]);
321 break;
322 case TGSI_SEMANTIC_FOG:
323 fog = 1;
324 break;
325 default:
326 assert(0);
327 }
328 }
329
330 nv40->swtnl.nr_attribs = 0;
331
332 /* Map draw vtxprog output to hw attribute IDs */
333 for (i = 0; i < 2; i++) {
334 if (!(colour & (1 << i)))
335 continue;
336 emit_attrib(nv40, 3 + i, EMIT_4UB, TGSI_SEMANTIC_COLOR, i);
337 }
338
339 for (i = 0; i < 8; i++) {
340 if (!(texcoords & (1 << i)))
341 continue;
342 emit_attrib(nv40, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i);
343 }
344
345 if (fog) {
346 emit_attrib(nv40, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0);
347 }
348
349 emit_attrib(nv40, 0, EMIT_3F, TGSI_SEMANTIC_POSITION, 0);
350
351 return FALSE;
352 }
353
354 struct nv40_state_entry nv40_state_vtxfmt = {
355 .validate = nv40_state_vtxfmt_validate,
356 .dirty = {
357 .pipe = NV40_NEW_ARRAYS | NV40_NEW_FRAGPROG,
358 .hw = 0
359 }
360 };
361