Merge commit 'origin/gallium-master-merge'
[mesa.git] / src / gallium / drivers / nv40 / nv40_draw.c
1 #include "pipe/p_shader_tokens.h"
2
3 #include "util/u_pack_color.h"
4
5 #include "draw/draw_context.h"
6 #include "draw/draw_vertex.h"
7 #include "draw/draw_pipe.h"
8
9 #include "nv40_context.h"
10 #define NV40_SHADER_NO_FUCKEDNESS
11 #include "nv40_shader.h"
12
13 /* Simple, but crappy, swtnl path, hopefully we wont need to hit this very
14 * often at all. Uses "quadro style" vertex submission + a fixed vertex
15 * layout to avoid the need to generate a vertex program or vtxfmt.
16 */
17
18 struct nv40_render_stage {
19 struct draw_stage stage;
20 struct nv40_context *nv40;
21 unsigned prim;
22 };
23
24 static INLINE struct nv40_render_stage *
25 nv40_render_stage(struct draw_stage *stage)
26 {
27 return (struct nv40_render_stage *)stage;
28 }
29
30 static INLINE void
31 nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v)
32 {
33 unsigned i;
34
35 for (i = 0; i < nv40->swtnl.nr_attribs; i++) {
36 unsigned idx = nv40->swtnl.draw[i];
37 unsigned hw = nv40->swtnl.hw[i];
38
39 switch (nv40->swtnl.emit[i]) {
40 case EMIT_OMIT:
41 break;
42 case EMIT_1F:
43 BEGIN_RING(curie, NV40TCL_VTX_ATTR_1F(hw), 1);
44 OUT_RING (fui(v->data[idx][0]));
45 break;
46 case EMIT_2F:
47 BEGIN_RING(curie, NV40TCL_VTX_ATTR_2F_X(hw), 2);
48 OUT_RING (fui(v->data[idx][0]));
49 OUT_RING (fui(v->data[idx][1]));
50 break;
51 case EMIT_3F:
52 BEGIN_RING(curie, NV40TCL_VTX_ATTR_3F_X(hw), 3);
53 OUT_RING (fui(v->data[idx][0]));
54 OUT_RING (fui(v->data[idx][1]));
55 OUT_RING (fui(v->data[idx][2]));
56 break;
57 case EMIT_4F:
58 BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(hw), 4);
59 OUT_RING (fui(v->data[idx][0]));
60 OUT_RING (fui(v->data[idx][1]));
61 OUT_RING (fui(v->data[idx][2]));
62 OUT_RING (fui(v->data[idx][3]));
63 break;
64 case EMIT_4UB:
65 BEGIN_RING(curie, NV40TCL_VTX_ATTR_4UB(hw), 1);
66 OUT_RING (pack_ub4(float_to_ubyte(v->data[idx][0]),
67 float_to_ubyte(v->data[idx][1]),
68 float_to_ubyte(v->data[idx][2]),
69 float_to_ubyte(v->data[idx][3])));
70 break;
71 default:
72 assert(0);
73 break;
74 }
75 }
76 }
77
78 static INLINE void
79 nv40_render_prim(struct draw_stage *stage, struct prim_header *prim,
80 unsigned mode, unsigned count)
81 {
82 struct nv40_render_stage *rs = nv40_render_stage(stage);
83 struct nv40_context *nv40 = rs->nv40;
84 struct nouveau_pushbuf *pb = nv40->nvws->channel->pushbuf;
85 unsigned i;
86
87 /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */
88 if (pb->remaining < ((count * 20) + 6)) {
89 if (rs->prim != NV40TCL_BEGIN_END_STOP) {
90 NOUVEAU_ERR("AIII, missed flush\n");
91 assert(0);
92 }
93 FIRE_RING(NULL);
94 nv40_state_emit(nv40);
95 }
96
97 /* Switch primitive modes if necessary */
98 if (rs->prim != mode) {
99 if (rs->prim != NV40TCL_BEGIN_END_STOP) {
100 BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
101 OUT_RING (NV40TCL_BEGIN_END_STOP);
102 }
103
104 BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
105 OUT_RING (mode);
106 rs->prim = mode;
107 }
108
109 /* Emit vertex data */
110 for (i = 0; i < count; i++)
111 nv40_render_vertex(nv40, prim->v[i]);
112
113 /* If it's likely we'll need to empty the push buffer soon, finish
114 * off the primitive now.
115 */
116 if (pb->remaining < ((count * 20) + 6)) {
117 BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
118 OUT_RING (NV40TCL_BEGIN_END_STOP);
119 rs->prim = NV40TCL_BEGIN_END_STOP;
120 }
121 }
122
123 static void
124 nv40_render_point(struct draw_stage *draw, struct prim_header *prim)
125 {
126 nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_POINTS, 1);
127 }
128
129 static void
130 nv40_render_line(struct draw_stage *draw, struct prim_header *prim)
131 {
132 nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_LINES, 2);
133 }
134
135 static void
136 nv40_render_tri(struct draw_stage *draw, struct prim_header *prim)
137 {
138 nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_TRIANGLES, 3);
139 }
140
141 static void
142 nv40_render_flush(struct draw_stage *draw, unsigned flags)
143 {
144 struct nv40_render_stage *rs = nv40_render_stage(draw);
145 struct nv40_context *nv40 = rs->nv40;
146
147 if (rs->prim != NV40TCL_BEGIN_END_STOP) {
148 BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
149 OUT_RING (NV40TCL_BEGIN_END_STOP);
150 rs->prim = NV40TCL_BEGIN_END_STOP;
151 }
152 }
153
154 static void
155 nv40_render_reset_stipple_counter(struct draw_stage *draw)
156 {
157 }
158
159 static void
160 nv40_render_destroy(struct draw_stage *draw)
161 {
162 FREE(draw);
163 }
164
165 static INLINE void
166 emit_mov(struct nv40_vertex_program *vp,
167 unsigned dst, unsigned src, unsigned vor, unsigned mask)
168 {
169 struct nv40_vertex_program_exec *inst;
170
171 vp->insns = realloc(vp->insns,
172 sizeof(struct nv40_vertex_program_exec) *
173 ++vp->nr_insns);
174 inst = &vp->insns[vp->nr_insns - 1];
175
176 inst->data[0] = 0x401f9c6c;
177 inst->data[1] = 0x0040000d | (src << 8);
178 inst->data[2] = 0x8106c083;
179 inst->data[3] = 0x6041ff80 | (dst << 2) | (mask << 13);
180 inst->const_index = -1;
181 inst->has_branch_offset = FALSE;
182
183 vp->ir |= (1 << src);
184 if (vor != ~0)
185 vp->or |= (1 << vor);
186 }
187
188 static struct nv40_vertex_program *
189 create_drawvp(struct nv40_context *nv40)
190 {
191 struct nv40_vertex_program *vp = CALLOC_STRUCT(nv40_vertex_program);
192 unsigned i;
193
194 emit_mov(vp, NV40_VP_INST_DEST_POS, 0, ~0, 0xf);
195 emit_mov(vp, NV40_VP_INST_DEST_COL0, 3, 0, 0xf);
196 emit_mov(vp, NV40_VP_INST_DEST_COL1, 4, 1, 0xf);
197 emit_mov(vp, NV40_VP_INST_DEST_BFC0, 3, 2, 0xf);
198 emit_mov(vp, NV40_VP_INST_DEST_BFC1, 4, 3, 0xf);
199 emit_mov(vp, NV40_VP_INST_DEST_FOGC, 5, 4, 0x8);
200 for (i = 0; i < 8; i++)
201 emit_mov(vp, NV40_VP_INST_DEST_TC(i), 8 + i, 14 + i, 0xf);
202
203 vp->insns[vp->nr_insns - 1].data[3] |= 1;
204 vp->translated = TRUE;
205 return vp;
206 }
207
208 struct draw_stage *
209 nv40_draw_render_stage(struct nv40_context *nv40)
210 {
211 struct nv40_render_stage *render = CALLOC_STRUCT(nv40_render_stage);
212
213 if (!nv40->swtnl.vertprog)
214 nv40->swtnl.vertprog = create_drawvp(nv40);
215
216 render->nv40 = nv40;
217 render->stage.draw = nv40->draw;
218 render->stage.point = nv40_render_point;
219 render->stage.line = nv40_render_line;
220 render->stage.tri = nv40_render_tri;
221 render->stage.flush = nv40_render_flush;
222 render->stage.reset_stipple_counter = nv40_render_reset_stipple_counter;
223 render->stage.destroy = nv40_render_destroy;
224
225 return &render->stage;
226 }
227
228 boolean
229 nv40_draw_elements_swtnl(struct pipe_context *pipe,
230 struct pipe_buffer *idxbuf, unsigned idxbuf_size,
231 unsigned mode, unsigned start, unsigned count)
232 {
233 struct nv40_context *nv40 = nv40_context(pipe);
234 struct pipe_winsys *ws = pipe->winsys;
235 unsigned i;
236 void *map;
237
238 if (!nv40_state_validate_swtnl(nv40))
239 return FALSE;
240 nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF);
241 nv40_state_emit(nv40);
242
243 for (i = 0; i < nv40->vtxbuf_nr; i++) {
244 map = ws->buffer_map(ws, nv40->vtxbuf[i].buffer,
245 PIPE_BUFFER_USAGE_CPU_READ);
246 draw_set_mapped_vertex_buffer(nv40->draw, i, map);
247 }
248
249 if (idxbuf) {
250 map = ws->buffer_map(ws, idxbuf, PIPE_BUFFER_USAGE_CPU_READ);
251 draw_set_mapped_element_buffer(nv40->draw, idxbuf_size, map);
252 } else {
253 draw_set_mapped_element_buffer(nv40->draw, 0, NULL);
254 }
255
256 if (nv40->constbuf[PIPE_SHADER_VERTEX]) {
257 const unsigned nr = nv40->constbuf_nr[PIPE_SHADER_VERTEX];
258
259 map = ws->buffer_map(ws, nv40->constbuf[PIPE_SHADER_VERTEX],
260 PIPE_BUFFER_USAGE_CPU_READ);
261 draw_set_mapped_constant_buffer(nv40->draw, map, nr);
262 }
263
264 draw_arrays(nv40->draw, mode, start, count);
265
266 for (i = 0; i < nv40->vtxbuf_nr; i++)
267 ws->buffer_unmap(ws, nv40->vtxbuf[i].buffer);
268
269 if (idxbuf)
270 ws->buffer_unmap(ws, idxbuf);
271
272 if (nv40->constbuf[PIPE_SHADER_VERTEX])
273 ws->buffer_unmap(ws, nv40->constbuf[PIPE_SHADER_VERTEX]);
274
275 draw_flush(nv40->draw);
276 pipe->flush(pipe, 0, NULL);
277
278 return TRUE;
279 }
280
281 static INLINE void
282 emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit,
283 unsigned semantic, unsigned index)
284 {
285 unsigned draw_out = draw_find_vs_output(nv40->draw, semantic, index);
286 unsigned a = nv40->swtnl.nr_attribs++;
287
288 nv40->swtnl.hw[a] = hw;
289 nv40->swtnl.emit[a] = emit;
290 nv40->swtnl.draw[a] = draw_out;
291 }
292
293 static boolean
294 nv40_state_vtxfmt_validate(struct nv40_context *nv40)
295 {
296 struct nv40_fragment_program *fp = nv40->fragprog;
297 unsigned colour = 0, texcoords = 0, fog = 0, i;
298
299 /* Determine needed fragprog inputs */
300 for (i = 0; i < fp->info.num_inputs; i++) {
301 switch (fp->info.input_semantic_name[i]) {
302 case TGSI_SEMANTIC_POSITION:
303 break;
304 case TGSI_SEMANTIC_COLOR:
305 colour |= (1 << fp->info.input_semantic_index[i]);
306 break;
307 case TGSI_SEMANTIC_GENERIC:
308 texcoords |= (1 << fp->info.input_semantic_index[i]);
309 break;
310 case TGSI_SEMANTIC_FOG:
311 fog = 1;
312 break;
313 default:
314 assert(0);
315 }
316 }
317
318 nv40->swtnl.nr_attribs = 0;
319
320 /* Map draw vtxprog output to hw attribute IDs */
321 for (i = 0; i < 2; i++) {
322 if (!(colour & (1 << i)))
323 continue;
324 emit_attrib(nv40, 3 + i, EMIT_4UB, TGSI_SEMANTIC_COLOR, i);
325 }
326
327 for (i = 0; i < 8; i++) {
328 if (!(texcoords & (1 << i)))
329 continue;
330 emit_attrib(nv40, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i);
331 }
332
333 if (fog) {
334 emit_attrib(nv40, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0);
335 }
336
337 emit_attrib(nv40, 0, EMIT_3F, TGSI_SEMANTIC_POSITION, 0);
338
339 return FALSE;
340 }
341
342 struct nv40_state_entry nv40_state_vtxfmt = {
343 .validate = nv40_state_vtxfmt_validate,
344 .dirty = {
345 .pipe = NV40_NEW_ARRAYS | NV40_NEW_FRAGPROG,
346 .hw = 0
347 }
348 };
349