gallium: updated prototype (missed in prev commit)
[mesa.git] / src / gallium / auxiliary / draw / draw_aapoint.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * AA point stage: AA points are converted to quads and rendered with a
30 * special fragment shader. Another approach would be to use a texture
31 * map image of a point, but experiments indicate the quality isn't nearly
32 * as good as this approach.
33 *
34 * Note: this looks a lot like draw_aaline.c but there's actually little
35 * if any code that can be shared.
36 *
37 * Authors: Brian Paul
38 */
39
40
41 #include "pipe/p_util.h"
42 #include "pipe/p_inlines.h"
43 #include "pipe/p_context.h"
44 #include "pipe/p_defines.h"
45 #include "pipe/p_shader_tokens.h"
46
47 #include "tgsi/util/tgsi_transform.h"
48 #include "tgsi/util/tgsi_dump.h"
49
50 #include "draw_context.h"
51 #include "draw_private.h"
52
53
54 /*
55 * Enabling NORMALIZE might give _slightly_ better results.
56 * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or
57 * d=x*x+y*y. Since we're working with a unit circle, the later seems
58 * close enough and saves some costly instructions.
59 */
60 #define NORMALIZE 0
61
62
63 /**
64 * Subclass of pipe_shader_state to carry extra fragment shader info.
65 */
66 struct aapoint_fragment_shader
67 {
68 struct pipe_shader_state state;
69 void *driver_fs; /**< the regular shader */
70 void *aapoint_fs; /**< the aa point-augmented shader */
71 };
72
73
74 /**
75 * Subclass of draw_stage
76 */
77 struct aapoint_stage
78 {
79 struct draw_stage stage;
80
81 int psize_slot;
82 float radius;
83
84 /** this is the vertex attrib slot for the new texcoords */
85 uint tex_slot;
86
87 /*
88 * Currently bound state
89 */
90 struct aapoint_fragment_shader *fs;
91
92 /*
93 * Driver interface/override functions
94 */
95 void * (*driver_create_fs_state)(struct pipe_context *,
96 const struct pipe_shader_state *);
97 void (*driver_bind_fs_state)(struct pipe_context *, void *);
98 void (*driver_delete_fs_state)(struct pipe_context *, void *);
99
100 struct pipe_context *pipe;
101 };
102
103
104
105 /**
106 * Subclass of tgsi_transform_context, used for transforming the
107 * user's fragment shader to add the special AA instructions.
108 */
109 struct aa_transform_context {
110 struct tgsi_transform_context base;
111 uint tempsUsed; /**< bitmask */
112 int colorOutput; /**< which output is the primary color */
113 int maxInput, maxGeneric; /**< max input index found */
114 int tmp0, colorTemp; /**< temp registers */
115 boolean firstInstruction;
116 };
117
118
119 /**
120 * TGSI declaration transform callback.
121 * Look for two free temp regs and available input reg for new texcoords.
122 */
123 static void
124 aa_transform_decl(struct tgsi_transform_context *ctx,
125 struct tgsi_full_declaration *decl)
126 {
127 struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
128
129 if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
130 decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR &&
131 decl->Semantic.SemanticIndex == 0) {
132 aactx->colorOutput = decl->u.DeclarationRange.First;
133 }
134 else if (decl->Declaration.File == TGSI_FILE_INPUT) {
135 if ((int) decl->u.DeclarationRange.Last > aactx->maxInput)
136 aactx->maxInput = decl->u.DeclarationRange.Last;
137 if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC &&
138 (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) {
139 aactx->maxGeneric = decl->Semantic.SemanticIndex;
140 }
141 }
142 else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
143 uint i;
144 for (i = decl->u.DeclarationRange.First;
145 i <= decl->u.DeclarationRange.Last; i++) {
146 aactx->tempsUsed |= (1 << i);
147 }
148 }
149
150 ctx->emit_declaration(ctx, decl);
151 }
152
153
154 /**
155 * TGSI instruction transform callback.
156 * Replace writes to result.color w/ a temp reg.
157 * Upon END instruction, insert texture sampling code for antialiasing.
158 */
159 static void
160 aa_transform_inst(struct tgsi_transform_context *ctx,
161 struct tgsi_full_instruction *inst)
162 {
163 struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
164 struct tgsi_full_instruction newInst;
165
166 if (aactx->firstInstruction) {
167 /* emit our new declarations before the first instruction */
168
169 struct tgsi_full_declaration decl;
170 const int texInput = aactx->maxInput + 1;
171 int tmp0;
172 uint i;
173
174 /* find two free temp regs */
175 for (i = 0; i < 32; i++) {
176 if ((aactx->tempsUsed & (1 << i)) == 0) {
177 /* found a free temp */
178 if (aactx->tmp0 < 0)
179 aactx->tmp0 = i;
180 else if (aactx->colorTemp < 0)
181 aactx->colorTemp = i;
182 else
183 break;
184 }
185 }
186
187 assert(aactx->colorTemp != aactx->tmp0);
188
189 tmp0 = aactx->tmp0;
190
191 /* declare new generic input/texcoord */
192 decl = tgsi_default_full_declaration();
193 decl.Declaration.File = TGSI_FILE_INPUT;
194 decl.Declaration.Semantic = 1;
195 decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
196 decl.Semantic.SemanticIndex = aactx->maxGeneric + 1;
197 decl.Declaration.Interpolate = 1;
198 /* XXX this could be linear... */
199 decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
200 decl.u.DeclarationRange.First =
201 decl.u.DeclarationRange.Last = texInput;
202 ctx->emit_declaration(ctx, &decl);
203
204 /* declare new temp regs */
205 decl = tgsi_default_full_declaration();
206 decl.Declaration.File = TGSI_FILE_TEMPORARY;
207 decl.u.DeclarationRange.First =
208 decl.u.DeclarationRange.Last = tmp0;
209 ctx->emit_declaration(ctx, &decl);
210
211 decl = tgsi_default_full_declaration();
212 decl.Declaration.File = TGSI_FILE_TEMPORARY;
213 decl.u.DeclarationRange.First =
214 decl.u.DeclarationRange.Last = aactx->colorTemp;
215 ctx->emit_declaration(ctx, &decl);
216
217 aactx->firstInstruction = FALSE;
218
219
220 /*
221 * Emit code to compute fragment coverage, kill if outside point radius
222 *
223 * Temp reg0 usage:
224 * t0.x = distance of fragment from center point
225 * t0.y = boolean, is t0.x > 1.0, also misc temp usage
226 * t0.z = temporary for computing 1/(1-k) value
227 * t0.w = final coverage value
228 */
229
230 /* MUL t0.xy, tex, tex; # compute x^2, y^2 */
231 newInst = tgsi_default_full_instruction();
232 newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
233 newInst.Instruction.NumDstRegs = 1;
234 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
235 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
236 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XY;
237 newInst.Instruction.NumSrcRegs = 2;
238 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
239 newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
240 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
241 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
242 ctx->emit_instruction(ctx, &newInst);
243
244 /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */
245 newInst = tgsi_default_full_instruction();
246 newInst.Instruction.Opcode = TGSI_OPCODE_ADD;
247 newInst.Instruction.NumDstRegs = 1;
248 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
249 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
250 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
251 newInst.Instruction.NumSrcRegs = 2;
252 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
253 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
254 newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
255 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
256 newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
257 newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
258 ctx->emit_instruction(ctx, &newInst);
259
260 #if NORMALIZE /* OPTIONAL normalization of length */
261 /* RSQ t0.x, t0.x; */
262 newInst = tgsi_default_full_instruction();
263 newInst.Instruction.Opcode = TGSI_OPCODE_RSQ;
264 newInst.Instruction.NumDstRegs = 1;
265 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
266 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
267 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
268 newInst.Instruction.NumSrcRegs = 1;
269 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
270 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
271 ctx->emit_instruction(ctx, &newInst);
272
273 /* RCP t0.x, t0.x; */
274 newInst = tgsi_default_full_instruction();
275 newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
276 newInst.Instruction.NumDstRegs = 1;
277 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
278 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
279 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
280 newInst.Instruction.NumSrcRegs = 1;
281 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
282 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
283 ctx->emit_instruction(ctx, &newInst);
284 #endif
285
286 /* SGT t0.y, t0.xxxx, t0.wwww; # bool b = d > 1 (NOTE t0.w == 1) */
287 newInst = tgsi_default_full_instruction();
288 newInst.Instruction.Opcode = TGSI_OPCODE_SGT;
289 newInst.Instruction.NumDstRegs = 1;
290 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
291 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
292 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
293 newInst.Instruction.NumSrcRegs = 2;
294 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
295 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
296 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
297 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
298 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
299 newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
300 ctx->emit_instruction(ctx, &newInst);
301
302 /* KILP -t0.yyyy; # if b, KILL */
303 newInst = tgsi_default_full_instruction();
304 newInst.Instruction.Opcode = TGSI_OPCODE_KILP;
305 newInst.Instruction.NumDstRegs = 0;
306 newInst.Instruction.NumSrcRegs = 1;
307 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
308 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
309 newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
310 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
311 newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
312 newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
313 newInst.FullSrcRegisters[0].SrcRegister.Negate = 1;
314 ctx->emit_instruction(ctx, &newInst);
315
316
317 /* compute coverage factor = (1-d)/(1-k) */
318
319 /* SUB t0.z, tex.w, tex.z; # m = 1 - k */
320 newInst = tgsi_default_full_instruction();
321 newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
322 newInst.Instruction.NumDstRegs = 1;
323 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
324 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
325 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
326 newInst.Instruction.NumSrcRegs = 2;
327 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
328 newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
329 newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
330 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
331 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
332 newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z;
333 ctx->emit_instruction(ctx, &newInst);
334
335 /* RCP t0.z, t0.z; # t0.z = 1 / m */
336 newInst = tgsi_default_full_instruction();
337 newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
338 newInst.Instruction.NumDstRegs = 1;
339 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
340 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
341 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
342 newInst.Instruction.NumSrcRegs = 1;
343 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
344 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
345 newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z;
346 ctx->emit_instruction(ctx, &newInst);
347
348 /* SUB t0.y, 1, t0.x; # d = 1 - d */
349 newInst = tgsi_default_full_instruction();
350 newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
351 newInst.Instruction.NumDstRegs = 1;
352 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
353 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
354 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
355 newInst.Instruction.NumSrcRegs = 2;
356 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
357 newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
358 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
359 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
360 newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
361 newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
362 ctx->emit_instruction(ctx, &newInst);
363
364 /* MUL t0.w, t0.y, t0.z; # coverage = d * m */
365 newInst = tgsi_default_full_instruction();
366 newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
367 newInst.Instruction.NumDstRegs = 1;
368 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
369 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
370 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
371 newInst.Instruction.NumSrcRegs = 2;
372 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
373 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
374 newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
375 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
376 newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
377 newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z;
378 ctx->emit_instruction(ctx, &newInst);
379
380 /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */
381 newInst = tgsi_default_full_instruction();
382 newInst.Instruction.Opcode = TGSI_OPCODE_SLE;
383 newInst.Instruction.NumDstRegs = 1;
384 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
385 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
386 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
387 newInst.Instruction.NumSrcRegs = 2;
388 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
389 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
390 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
391 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
392 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
393 newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z;
394 ctx->emit_instruction(ctx, &newInst);
395
396 /* CMP t0.w, -t0.y, tex.w, t0.w;
397 * # if -t0.y < 0 then
398 * t0.w = 1
399 * else
400 * t0.w = t0.w
401 */
402 newInst = tgsi_default_full_instruction();
403 newInst.Instruction.Opcode = TGSI_OPCODE_CMP;
404 newInst.Instruction.NumDstRegs = 1;
405 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
406 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
407 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
408 newInst.Instruction.NumSrcRegs = 3;
409 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
410 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
411 newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
412 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
413 newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
414 newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
415 newInst.FullSrcRegisters[0].SrcRegister.Negate = 1;
416 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
417 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
418 newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
419 newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
420 newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
421 newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
422 newInst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY;
423 newInst.FullSrcRegisters[2].SrcRegister.Index = tmp0;
424 newInst.FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
425 newInst.FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
426 newInst.FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
427 newInst.FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
428 ctx->emit_instruction(ctx, &newInst);
429
430 }
431
432 if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
433 /* add alpha modulation code at tail of program */
434
435 /* MOV result.color.xyz, colorTemp; */
436 newInst = tgsi_default_full_instruction();
437 newInst.Instruction.Opcode = TGSI_OPCODE_MOV;
438 newInst.Instruction.NumDstRegs = 1;
439 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
440 newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput;
441 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ;
442 newInst.Instruction.NumSrcRegs = 1;
443 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
444 newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp;
445 ctx->emit_instruction(ctx, &newInst);
446
447 /* MUL result.color.w, colorTemp, tmp0.w; */
448 newInst = tgsi_default_full_instruction();
449 newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
450 newInst.Instruction.NumDstRegs = 1;
451 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
452 newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput;
453 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
454 newInst.Instruction.NumSrcRegs = 2;
455 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
456 newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp;
457 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
458 newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->tmp0;
459 ctx->emit_instruction(ctx, &newInst);
460 }
461 else {
462 /* Not an END instruction.
463 * Look for writes to result.color and replace with colorTemp reg.
464 */
465 uint i;
466
467 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
468 struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
469 if (dst->DstRegister.File == TGSI_FILE_OUTPUT &&
470 dst->DstRegister.Index == aactx->colorOutput) {
471 dst->DstRegister.File = TGSI_FILE_TEMPORARY;
472 dst->DstRegister.Index = aactx->colorTemp;
473 }
474 }
475 }
476
477 ctx->emit_instruction(ctx, inst);
478 }
479
480
481 /**
482 * Generate the frag shader we'll use for drawing AA lines.
483 * This will be the user's shader plus some texture/modulate instructions.
484 */
485 static void
486 generate_aapoint_fs(struct aapoint_stage *aapoint)
487 {
488 const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
489 struct draw_context *draw = aapoint->stage.draw;
490 struct pipe_shader_state aapoint_fs;
491 struct aa_transform_context transform;
492
493 #define MAX 1000
494
495 aapoint_fs = *orig_fs; /* copy to init */
496 aapoint_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX);
497
498 memset(&transform, 0, sizeof(transform));
499 transform.colorOutput = -1;
500 transform.maxInput = -1;
501 transform.maxGeneric = -1;
502 transform.colorTemp = -1;
503 transform.tmp0 = -1;
504 transform.firstInstruction = TRUE;
505 transform.base.transform_instruction = aa_transform_inst;
506 transform.base.transform_declaration = aa_transform_decl;
507
508 tgsi_transform_shader(orig_fs->tokens,
509 (struct tgsi_token *) aapoint_fs.tokens,
510 MAX, &transform.base);
511
512 #if 0 /* DEBUG */
513 tgsi_dump(orig_fs->tokens, 0);
514 tgsi_dump(aapoint_fs.tokens, 0);
515 #endif
516
517 aapoint_fs.input_semantic_name[aapoint_fs.num_inputs] = TGSI_SEMANTIC_GENERIC;
518 aapoint_fs.input_semantic_index[aapoint_fs.num_inputs] = transform.maxGeneric + 1;
519 aapoint_fs.num_inputs++;
520
521 aapoint->fs->aapoint_fs
522 = aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs);
523
524 /* advertise the extra post-transform vertex attributes which will have
525 * the texcoords.
526 */
527 draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
528 draw->extra_vp_outputs.semantic_index = transform.maxGeneric + 1;
529 }
530
531
532 /**
533 * When we're about to draw our first AA line in a batch, this function is
534 * called to tell the driver to bind our modified fragment shader.
535 */
536 static void
537 bind_aapoint_fragment_shader(struct aapoint_stage *aapoint)
538 {
539 if (!aapoint->fs->aapoint_fs) {
540 generate_aapoint_fs(aapoint);
541 }
542 aapoint->driver_bind_fs_state(aapoint->pipe, aapoint->fs->aapoint_fs);
543 }
544
545
546
547 static INLINE struct aapoint_stage *
548 aapoint_stage( struct draw_stage *stage )
549 {
550 return (struct aapoint_stage *) stage;
551 }
552
553
554 static void
555 passthrough_line(struct draw_stage *stage, struct prim_header *header)
556 {
557 stage->next->line(stage->next, header);
558 }
559
560
561 static void
562 passthrough_tri(struct draw_stage *stage, struct prim_header *header)
563 {
564 stage->next->tri(stage->next, header);
565 }
566
567
568 /**
569 * Draw an AA point by drawing a quad.
570 */
571 static void
572 aapoint_point(struct draw_stage *stage, struct prim_header *header)
573 {
574 const struct aapoint_stage *aapoint = aapoint_stage(stage);
575 struct prim_header tri;
576 struct vertex_header *v[4];
577 uint texPos = aapoint->tex_slot;
578 float radius, *pos, *tex;
579 uint i;
580 float k;
581
582 if (aapoint->psize_slot >= 0) {
583 radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0];
584 }
585 else {
586 radius = aapoint->radius;
587 }
588
589 /*
590 * Note: the texcoords (generic attrib, really) we use are special:
591 * The S and T components simply vary from -1 to +1.
592 * The R component is k, below.
593 * The Q component is 1.0 and will used as a handy constant in the
594 * fragment shader.
595 */
596
597 /*
598 * k is the threshold distance from the point's center at which
599 * we begin alpha attenuation (the coverage value).
600 * Operating within a unit circle, we'll compute the fragment's
601 * distance 'd' from the center point using the texcoords.
602 * IF d > 1.0 THEN
603 * KILL fragment
604 * ELSE IF d > k THEN
605 * compute coverage in [0,1] proportional to d in [k, 1].
606 * ELSE
607 * coverage = 1.0; // full coverage
608 * ENDIF
609 *
610 * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to
611 * avoid using IF/ELSE/ENDIF TGSI opcodes.
612 */
613
614 #if !NORMALIZE
615 k = 1.0f / radius;
616 k = 1.0f - 2.0f * k + k * k;
617 #else
618 k = 1.0f - 1.0f / radius;
619 #endif
620
621 /* allocate/dup new verts */
622 for (i = 0; i < 4; i++) {
623 v[i] = dup_vert(stage, header->v[0], i);
624 }
625
626 /* new verts */
627 pos = v[0]->data[0];
628 pos[0] -= radius;
629 pos[1] -= radius;
630
631 pos = v[1]->data[0];
632 pos[0] += radius;
633 pos[1] -= radius;
634
635 pos = v[2]->data[0];
636 pos[0] += radius;
637 pos[1] += radius;
638
639 pos = v[3]->data[0];
640 pos[0] -= radius;
641 pos[1] += radius;
642
643 /* new texcoords */
644 tex = v[0]->data[texPos];
645 ASSIGN_4V(tex, -1, -1, k, 1);
646
647 tex = v[1]->data[texPos];
648 ASSIGN_4V(tex, 1, -1, k, 1);
649
650 tex = v[2]->data[texPos];
651 ASSIGN_4V(tex, 1, 1, k, 1);
652
653 tex = v[3]->data[texPos];
654 ASSIGN_4V(tex, -1, 1, k, 1);
655
656 /* emit 2 tris for the quad strip */
657 tri.v[0] = v[0];
658 tri.v[1] = v[1];
659 tri.v[2] = v[2];
660 stage->next->tri( stage->next, &tri );
661
662 tri.v[0] = v[0];
663 tri.v[1] = v[2];
664 tri.v[2] = v[3];
665 stage->next->tri( stage->next, &tri );
666 }
667
668
669 static void
670 aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
671 {
672 auto struct aapoint_stage *aapoint = aapoint_stage(stage);
673 struct draw_context *draw = stage->draw;
674
675 assert(draw->rasterizer->point_smooth);
676
677 if (draw->rasterizer->point_size <= 2.0)
678 aapoint->radius = 1.0;
679 else
680 aapoint->radius = 0.5f * draw->rasterizer->point_size;
681
682 aapoint->tex_slot = draw->num_vs_outputs;
683 assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
684 draw->extra_vp_outputs.slot = aapoint->tex_slot;
685
686 /*
687 * Bind our fragprog.
688 */
689 bind_aapoint_fragment_shader(aapoint);
690
691 /* find psize slot in post-transform vertex */
692 aapoint->psize_slot = -1;
693 if (draw->rasterizer->point_size_per_vertex) {
694 /* find PSIZ vertex output */
695 const struct draw_vertex_shader *vs = draw->vertex_shader;
696 uint i;
697 for (i = 0; i < vs->state->num_outputs; i++) {
698 if (vs->state->output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
699 aapoint->psize_slot = i;
700 break;
701 }
702 }
703 }
704
705 /* now really draw first line */
706 stage->point = aapoint_point;
707 stage->point(stage, header);
708 }
709
710
711 static void
712 aapoint_flush(struct draw_stage *stage, unsigned flags)
713 {
714 struct draw_context *draw = stage->draw;
715 struct aapoint_stage *aapoint = aapoint_stage(stage);
716 struct pipe_context *pipe = aapoint->pipe;
717
718 stage->point = aapoint_first_point;
719 stage->next->flush( stage->next, flags );
720
721 /* restore original frag shader */
722 aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs);
723
724 draw->extra_vp_outputs.slot = 0;
725 }
726
727
728 static void
729 aapoint_reset_stipple_counter(struct draw_stage *stage)
730 {
731 stage->next->reset_stipple_counter( stage->next );
732 }
733
734
735 static void
736 aapoint_destroy(struct draw_stage *stage)
737 {
738 draw_free_temp_verts( stage );
739 FREE( stage );
740 }
741
742
743 static struct aapoint_stage *
744 draw_aapoint_stage(struct draw_context *draw)
745 {
746 struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage);
747
748 draw_alloc_temp_verts( &aapoint->stage, 4 );
749
750 aapoint->stage.draw = draw;
751 aapoint->stage.next = NULL;
752 aapoint->stage.point = aapoint_first_point;
753 aapoint->stage.line = passthrough_line;
754 aapoint->stage.tri = passthrough_tri;
755 aapoint->stage.flush = aapoint_flush;
756 aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter;
757 aapoint->stage.destroy = aapoint_destroy;
758
759 return aapoint;
760 }
761
762
763 /*
764 * XXX temporary? solution to mapping a pipe_context to a aapoint_stage.
765 */
766
767 #define MAX_CONTEXTS 10
768
769 static struct pipe_context *Pipe[MAX_CONTEXTS];
770 static struct aapoint_stage *Stage[MAX_CONTEXTS];
771 static uint NumContexts;
772
773 static void
774 add_aa_pipe_context(struct pipe_context *pipe, struct aapoint_stage *aa)
775 {
776 assert(NumContexts < MAX_CONTEXTS);
777 Pipe[NumContexts] = pipe;
778 Stage[NumContexts] = aa;
779 NumContexts++;
780 }
781
782 static struct aapoint_stage *
783 aapoint_stage_from_pipe(struct pipe_context *pipe)
784 {
785 uint i;
786 for (i = 0; i < NumContexts; i++) {
787 if (Pipe[i] == pipe)
788 return Stage[i];
789 }
790 assert(0);
791 return NULL;
792 }
793
794
795 /**
796 * This function overrides the driver's create_fs_state() function and
797 * will typically be called by the state tracker.
798 */
799 static void *
800 aapoint_create_fs_state(struct pipe_context *pipe,
801 const struct pipe_shader_state *fs)
802 {
803 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
804 struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader);
805
806 if (aafs) {
807 aafs->state = *fs;
808
809 /* pass-through */
810 aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs);
811 }
812
813 return aafs;
814 }
815
816
817 static void
818 aapoint_bind_fs_state(struct pipe_context *pipe, void *fs)
819 {
820 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
821 struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
822 /* save current */
823 aapoint->fs = aafs;
824 /* pass-through */
825 aapoint->driver_bind_fs_state(aapoint->pipe, aafs->driver_fs);
826 }
827
828
829 static void
830 aapoint_delete_fs_state(struct pipe_context *pipe, void *fs)
831 {
832 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
833 struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
834 /* pass-through */
835 aapoint->driver_delete_fs_state(aapoint->pipe, aafs->driver_fs);
836 FREE(aafs);
837 }
838
839
840 /**
841 * Called by drivers that want to install this AA point prim stage
842 * into the draw module's pipeline. This will not be used if the
843 * hardware has native support for AA points.
844 */
845 void
846 draw_install_aapoint_stage(struct draw_context *draw,
847 struct pipe_context *pipe)
848 {
849 struct aapoint_stage *aapoint;
850
851 /*
852 * Create / install AA point drawing / prim stage
853 */
854 aapoint = draw_aapoint_stage( draw );
855 assert(aapoint);
856 draw->pipeline.aapoint = &aapoint->stage;
857
858 aapoint->pipe = pipe;
859
860 /* save original driver functions */
861 aapoint->driver_create_fs_state = pipe->create_fs_state;
862 aapoint->driver_bind_fs_state = pipe->bind_fs_state;
863 aapoint->driver_delete_fs_state = pipe->delete_fs_state;
864
865 /* override the driver's functions */
866 pipe->create_fs_state = aapoint_create_fs_state;
867 pipe->bind_fs_state = aapoint_bind_fs_state;
868 pipe->delete_fs_state = aapoint_delete_fs_state;
869
870 add_aa_pipe_context(pipe, aapoint);
871 }