Merge branch 'mesa_7_6_branch'
[mesa.git] / src / gallium / drivers / nv40 / nv40_draw.c
1 #include "pipe/p_shader_tokens.h"
2 #include "pipe/p_inlines.h"
3
4 #include "util/u_pack_color.h"
5
6 #include "draw/draw_context.h"
7 #include "draw/draw_vertex.h"
8 #include "draw/draw_pipe.h"
9
10 #include "nv40_context.h"
11 #define NV40_SHADER_NO_FUCKEDNESS
12 #include "nv40_shader.h"
13
14 /* Simple, but crappy, swtnl path, hopefully we wont need to hit this very
15 * often at all. Uses "quadro style" vertex submission + a fixed vertex
16 * layout to avoid the need to generate a vertex program or vtxfmt.
17 */
18
19 struct nv40_render_stage {
20 struct draw_stage stage;
21 struct nv40_context *nv40;
22 unsigned prim;
23 };
24
25 static INLINE struct nv40_render_stage *
26 nv40_render_stage(struct draw_stage *stage)
27 {
28 return (struct nv40_render_stage *)stage;
29 }
30
31 static INLINE void
32 nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v)
33 {
34 unsigned i;
35
36 for (i = 0; i < nv40->swtnl.nr_attribs; i++) {
37 unsigned idx = nv40->swtnl.draw[i];
38 unsigned hw = nv40->swtnl.hw[i];
39
40 switch (nv40->swtnl.emit[i]) {
41 case EMIT_OMIT:
42 break;
43 case EMIT_1F:
44 BEGIN_RING(curie, NV40TCL_VTX_ATTR_1F(hw), 1);
45 OUT_RING (fui(v->data[idx][0]));
46 break;
47 case EMIT_2F:
48 BEGIN_RING(curie, NV40TCL_VTX_ATTR_2F_X(hw), 2);
49 OUT_RING (fui(v->data[idx][0]));
50 OUT_RING (fui(v->data[idx][1]));
51 break;
52 case EMIT_3F:
53 BEGIN_RING(curie, NV40TCL_VTX_ATTR_3F_X(hw), 3);
54 OUT_RING (fui(v->data[idx][0]));
55 OUT_RING (fui(v->data[idx][1]));
56 OUT_RING (fui(v->data[idx][2]));
57 break;
58 case EMIT_4F:
59 BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(hw), 4);
60 OUT_RING (fui(v->data[idx][0]));
61 OUT_RING (fui(v->data[idx][1]));
62 OUT_RING (fui(v->data[idx][2]));
63 OUT_RING (fui(v->data[idx][3]));
64 break;
65 case EMIT_4UB:
66 BEGIN_RING(curie, NV40TCL_VTX_ATTR_4UB(hw), 1);
67 OUT_RING (pack_ub4(float_to_ubyte(v->data[idx][0]),
68 float_to_ubyte(v->data[idx][1]),
69 float_to_ubyte(v->data[idx][2]),
70 float_to_ubyte(v->data[idx][3])));
71 break;
72 default:
73 assert(0);
74 break;
75 }
76 }
77 }
78
79 static INLINE void
80 nv40_render_prim(struct draw_stage *stage, struct prim_header *prim,
81 unsigned mode, unsigned count)
82 {
83 struct nv40_render_stage *rs = nv40_render_stage(stage);
84 struct nv40_context *nv40 = rs->nv40;
85 struct nouveau_pushbuf *pb = nv40->screen->base.channel->pushbuf;
86 unsigned i;
87
88 /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */
89 if (pb->remaining < ((count * 20) + 6)) {
90 if (rs->prim != NV40TCL_BEGIN_END_STOP) {
91 NOUVEAU_ERR("AIII, missed flush\n");
92 assert(0);
93 }
94 FIRE_RING(NULL);
95 nv40_state_emit(nv40);
96 }
97
98 /* Switch primitive modes if necessary */
99 if (rs->prim != mode) {
100 if (rs->prim != NV40TCL_BEGIN_END_STOP) {
101 BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
102 OUT_RING (NV40TCL_BEGIN_END_STOP);
103 }
104
105 BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
106 OUT_RING (mode);
107 rs->prim = mode;
108 }
109
110 /* Emit vertex data */
111 for (i = 0; i < count; i++)
112 nv40_render_vertex(nv40, prim->v[i]);
113
114 /* If it's likely we'll need to empty the push buffer soon, finish
115 * off the primitive now.
116 */
117 if (pb->remaining < ((count * 20) + 6)) {
118 BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
119 OUT_RING (NV40TCL_BEGIN_END_STOP);
120 rs->prim = NV40TCL_BEGIN_END_STOP;
121 }
122 }
123
124 static void
125 nv40_render_point(struct draw_stage *draw, struct prim_header *prim)
126 {
127 nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_POINTS, 1);
128 }
129
130 static void
131 nv40_render_line(struct draw_stage *draw, struct prim_header *prim)
132 {
133 nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_LINES, 2);
134 }
135
136 static void
137 nv40_render_tri(struct draw_stage *draw, struct prim_header *prim)
138 {
139 nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_TRIANGLES, 3);
140 }
141
142 static void
143 nv40_render_flush(struct draw_stage *draw, unsigned flags)
144 {
145 struct nv40_render_stage *rs = nv40_render_stage(draw);
146 struct nv40_context *nv40 = rs->nv40;
147
148 if (rs->prim != NV40TCL_BEGIN_END_STOP) {
149 BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
150 OUT_RING (NV40TCL_BEGIN_END_STOP);
151 rs->prim = NV40TCL_BEGIN_END_STOP;
152 }
153 }
154
155 static void
156 nv40_render_reset_stipple_counter(struct draw_stage *draw)
157 {
158 }
159
160 static void
161 nv40_render_destroy(struct draw_stage *draw)
162 {
163 FREE(draw);
164 }
165
166 static INLINE void
167 emit_mov(struct nv40_vertex_program *vp,
168 unsigned dst, unsigned src, unsigned vor, unsigned mask)
169 {
170 struct nv40_vertex_program_exec *inst;
171
172 vp->insns = realloc(vp->insns,
173 sizeof(struct nv40_vertex_program_exec) *
174 ++vp->nr_insns);
175 inst = &vp->insns[vp->nr_insns - 1];
176
177 inst->data[0] = 0x401f9c6c;
178 inst->data[1] = 0x0040000d | (src << 8);
179 inst->data[2] = 0x8106c083;
180 inst->data[3] = 0x6041ff80 | (dst << 2) | (mask << 13);
181 inst->const_index = -1;
182 inst->has_branch_offset = FALSE;
183
184 vp->ir |= (1 << src);
185 if (vor != ~0)
186 vp->or |= (1 << vor);
187 }
188
189 static struct nv40_vertex_program *
190 create_drawvp(struct nv40_context *nv40)
191 {
192 struct nv40_vertex_program *vp = CALLOC_STRUCT(nv40_vertex_program);
193 unsigned i;
194
195 emit_mov(vp, NV40_VP_INST_DEST_POS, 0, ~0, 0xf);
196 emit_mov(vp, NV40_VP_INST_DEST_COL0, 3, 0, 0xf);
197 emit_mov(vp, NV40_VP_INST_DEST_COL1, 4, 1, 0xf);
198 emit_mov(vp, NV40_VP_INST_DEST_BFC0, 3, 2, 0xf);
199 emit_mov(vp, NV40_VP_INST_DEST_BFC1, 4, 3, 0xf);
200 emit_mov(vp, NV40_VP_INST_DEST_FOGC, 5, 4, 0x8);
201 for (i = 0; i < 8; i++)
202 emit_mov(vp, NV40_VP_INST_DEST_TC(i), 8 + i, 14 + i, 0xf);
203
204 vp->insns[vp->nr_insns - 1].data[3] |= 1;
205 vp->translated = TRUE;
206 return vp;
207 }
208
209 struct draw_stage *
210 nv40_draw_render_stage(struct nv40_context *nv40)
211 {
212 struct nv40_render_stage *render = CALLOC_STRUCT(nv40_render_stage);
213
214 if (!nv40->swtnl.vertprog)
215 nv40->swtnl.vertprog = create_drawvp(nv40);
216
217 render->nv40 = nv40;
218 render->stage.draw = nv40->draw;
219 render->stage.point = nv40_render_point;
220 render->stage.line = nv40_render_line;
221 render->stage.tri = nv40_render_tri;
222 render->stage.flush = nv40_render_flush;
223 render->stage.reset_stipple_counter = nv40_render_reset_stipple_counter;
224 render->stage.destroy = nv40_render_destroy;
225
226 return &render->stage;
227 }
228
229 boolean
230 nv40_draw_elements_swtnl(struct pipe_context *pipe,
231 struct pipe_buffer *idxbuf, unsigned idxbuf_size,
232 unsigned mode, unsigned start, unsigned count)
233 {
234 struct nv40_context *nv40 = nv40_context(pipe);
235 struct pipe_screen *pscreen = pipe->screen;
236 unsigned i;
237 void *map;
238
239 if (!nv40_state_validate_swtnl(nv40))
240 return FALSE;
241 nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF);
242 nv40_state_emit(nv40);
243
244 for (i = 0; i < nv40->vtxbuf_nr; i++) {
245 map = pipe_buffer_map(pscreen, nv40->vtxbuf[i].buffer,
246 PIPE_BUFFER_USAGE_CPU_READ);
247 draw_set_mapped_vertex_buffer(nv40->draw, i, map);
248 }
249
250 if (idxbuf) {
251 map = pipe_buffer_map(pscreen, idxbuf,
252 PIPE_BUFFER_USAGE_CPU_READ);
253 draw_set_mapped_element_buffer(nv40->draw, idxbuf_size, map);
254 } else {
255 draw_set_mapped_element_buffer(nv40->draw, 0, NULL);
256 }
257
258 if (nv40->constbuf[PIPE_SHADER_VERTEX]) {
259 const unsigned nr = nv40->constbuf_nr[PIPE_SHADER_VERTEX];
260
261 map = pipe_buffer_map(pscreen,
262 nv40->constbuf[PIPE_SHADER_VERTEX],
263 PIPE_BUFFER_USAGE_CPU_READ);
264 draw_set_mapped_constant_buffer(nv40->draw, map, nr);
265 }
266
267 draw_arrays(nv40->draw, mode, start, count);
268
269 for (i = 0; i < nv40->vtxbuf_nr; i++)
270 pipe_buffer_unmap(pscreen, nv40->vtxbuf[i].buffer);
271
272 if (idxbuf)
273 pipe_buffer_unmap(pscreen, idxbuf);
274
275 if (nv40->constbuf[PIPE_SHADER_VERTEX])
276 pipe_buffer_unmap(pscreen, nv40->constbuf[PIPE_SHADER_VERTEX]);
277
278 draw_flush(nv40->draw);
279 pipe->flush(pipe, 0, NULL);
280
281 return TRUE;
282 }
283
284 static INLINE void
285 emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit,
286 unsigned semantic, unsigned index)
287 {
288 unsigned draw_out = draw_find_vs_output(nv40->draw, semantic, index);
289 unsigned a = nv40->swtnl.nr_attribs++;
290
291 nv40->swtnl.hw[a] = hw;
292 nv40->swtnl.emit[a] = emit;
293 nv40->swtnl.draw[a] = draw_out;
294 }
295
296 static boolean
297 nv40_state_vtxfmt_validate(struct nv40_context *nv40)
298 {
299 struct nv40_fragment_program *fp = nv40->fragprog;
300 unsigned colour = 0, texcoords = 0, fog = 0, i;
301
302 /* Determine needed fragprog inputs */
303 for (i = 0; i < fp->info.num_inputs; i++) {
304 switch (fp->info.input_semantic_name[i]) {
305 case TGSI_SEMANTIC_POSITION:
306 break;
307 case TGSI_SEMANTIC_COLOR:
308 colour |= (1 << fp->info.input_semantic_index[i]);
309 break;
310 case TGSI_SEMANTIC_GENERIC:
311 texcoords |= (1 << fp->info.input_semantic_index[i]);
312 break;
313 case TGSI_SEMANTIC_FOG:
314 fog = 1;
315 break;
316 default:
317 assert(0);
318 }
319 }
320
321 nv40->swtnl.nr_attribs = 0;
322
323 /* Map draw vtxprog output to hw attribute IDs */
324 for (i = 0; i < 2; i++) {
325 if (!(colour & (1 << i)))
326 continue;
327 emit_attrib(nv40, 3 + i, EMIT_4UB, TGSI_SEMANTIC_COLOR, i);
328 }
329
330 for (i = 0; i < 8; i++) {
331 if (!(texcoords & (1 << i)))
332 continue;
333 emit_attrib(nv40, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i);
334 }
335
336 if (fog) {
337 emit_attrib(nv40, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0);
338 }
339
340 emit_attrib(nv40, 0, EMIT_3F, TGSI_SEMANTIC_POSITION, 0);
341
342 return FALSE;
343 }
344
345 struct nv40_state_entry nv40_state_vtxfmt = {
346 .validate = nv40_state_vtxfmt_validate,
347 .dirty = {
348 .pipe = NV40_NEW_ARRAYS | NV40_NEW_FRAGPROG,
349 .hw = 0
350 }
351 };
352