c7f4349cb3e851d95f9da868c31a4d0ef78b4172
[mesa.git] / src / gallium / auxiliary / draw / draw_pipe_aapoint.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * AA point stage: AA points are converted to quads and rendered with a
30 * special fragment shader. Another approach would be to use a texture
31 * map image of a point, but experiments indicate the quality isn't nearly
32 * as good as this approach.
33 *
34 * Note: this looks a lot like draw_aaline.c but there's actually little
35 * if any code that can be shared.
36 *
37 * Authors: Brian Paul
38 */
39
40
41 #include "pipe/p_util.h"
42 #include "pipe/p_inlines.h"
43 #include "pipe/p_context.h"
44 #include "pipe/p_defines.h"
45 #include "pipe/p_shader_tokens.h"
46
47 #include "tgsi/tgsi_transform.h"
48 #include "tgsi/tgsi_dump.h"
49
50 #include "draw_context.h"
51 #include "draw_vs.h"
52 #include "draw_pipe.h"
53
54
55 /*
56 * Enabling NORMALIZE might give _slightly_ better results.
57 * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or
58 * d=x*x+y*y. Since we're working with a unit circle, the later seems
59 * close enough and saves some costly instructions.
60 */
61 #define NORMALIZE 0
62
63
64 /**
65 * Subclass of pipe_shader_state to carry extra fragment shader info.
66 */
67 struct aapoint_fragment_shader
68 {
69 struct pipe_shader_state state;
70 void *driver_fs; /**< the regular shader */
71 void *aapoint_fs; /**< the aa point-augmented shader */
72 int generic_attrib; /**< The generic input attrib/texcoord we'll use */
73 };
74
75
76 /**
77 * Subclass of draw_stage
78 */
79 struct aapoint_stage
80 {
81 struct draw_stage stage;
82
83 int psize_slot;
84 float radius;
85
86 /** this is the vertex attrib slot for the new texcoords */
87 uint tex_slot;
88 uint pos_slot;
89
90 /*
91 * Currently bound state
92 */
93 struct aapoint_fragment_shader *fs;
94
95 /*
96 * Driver interface/override functions
97 */
98 void * (*driver_create_fs_state)(struct pipe_context *,
99 const struct pipe_shader_state *);
100 void (*driver_bind_fs_state)(struct pipe_context *, void *);
101 void (*driver_delete_fs_state)(struct pipe_context *, void *);
102
103 struct pipe_context *pipe;
104 };
105
106
107
108 /**
109 * Subclass of tgsi_transform_context, used for transforming the
110 * user's fragment shader to add the special AA instructions.
111 */
112 struct aa_transform_context {
113 struct tgsi_transform_context base;
114 uint tempsUsed; /**< bitmask */
115 int colorOutput; /**< which output is the primary color */
116 int maxInput, maxGeneric; /**< max input index found */
117 int tmp0, colorTemp; /**< temp registers */
118 boolean firstInstruction;
119 };
120
121
122 /**
123 * TGSI declaration transform callback.
124 * Look for two free temp regs and available input reg for new texcoords.
125 */
126 static void
127 aa_transform_decl(struct tgsi_transform_context *ctx,
128 struct tgsi_full_declaration *decl)
129 {
130 struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
131
132 if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
133 decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR &&
134 decl->Semantic.SemanticIndex == 0) {
135 aactx->colorOutput = decl->DeclarationRange.First;
136 }
137 else if (decl->Declaration.File == TGSI_FILE_INPUT) {
138 if ((int) decl->DeclarationRange.Last > aactx->maxInput)
139 aactx->maxInput = decl->DeclarationRange.Last;
140 if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC &&
141 (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) {
142 aactx->maxGeneric = decl->Semantic.SemanticIndex;
143 }
144 }
145 else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
146 uint i;
147 for (i = decl->DeclarationRange.First;
148 i <= decl->DeclarationRange.Last; i++) {
149 aactx->tempsUsed |= (1 << i);
150 }
151 }
152
153 ctx->emit_declaration(ctx, decl);
154 }
155
156
157 /**
158 * TGSI instruction transform callback.
159 * Replace writes to result.color w/ a temp reg.
160 * Upon END instruction, insert texture sampling code for antialiasing.
161 */
162 static void
163 aa_transform_inst(struct tgsi_transform_context *ctx,
164 struct tgsi_full_instruction *inst)
165 {
166 struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
167 struct tgsi_full_instruction newInst;
168
169 if (aactx->firstInstruction) {
170 /* emit our new declarations before the first instruction */
171
172 struct tgsi_full_declaration decl;
173 const int texInput = aactx->maxInput + 1;
174 int tmp0;
175 uint i;
176
177 /* find two free temp regs */
178 for (i = 0; i < 32; i++) {
179 if ((aactx->tempsUsed & (1 << i)) == 0) {
180 /* found a free temp */
181 if (aactx->tmp0 < 0)
182 aactx->tmp0 = i;
183 else if (aactx->colorTemp < 0)
184 aactx->colorTemp = i;
185 else
186 break;
187 }
188 }
189
190 assert(aactx->colorTemp != aactx->tmp0);
191
192 tmp0 = aactx->tmp0;
193
194 /* declare new generic input/texcoord */
195 decl = tgsi_default_full_declaration();
196 decl.Declaration.File = TGSI_FILE_INPUT;
197 /* XXX this could be linear... */
198 decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
199 decl.Declaration.Semantic = 1;
200 decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
201 decl.Semantic.SemanticIndex = aactx->maxGeneric + 1;
202 decl.DeclarationRange.First =
203 decl.DeclarationRange.Last = texInput;
204 ctx->emit_declaration(ctx, &decl);
205
206 /* declare new temp regs */
207 decl = tgsi_default_full_declaration();
208 decl.Declaration.File = TGSI_FILE_TEMPORARY;
209 decl.DeclarationRange.First =
210 decl.DeclarationRange.Last = tmp0;
211 ctx->emit_declaration(ctx, &decl);
212
213 decl = tgsi_default_full_declaration();
214 decl.Declaration.File = TGSI_FILE_TEMPORARY;
215 decl.DeclarationRange.First =
216 decl.DeclarationRange.Last = aactx->colorTemp;
217 ctx->emit_declaration(ctx, &decl);
218
219 aactx->firstInstruction = FALSE;
220
221
222 /*
223 * Emit code to compute fragment coverage, kill if outside point radius
224 *
225 * Temp reg0 usage:
226 * t0.x = distance of fragment from center point
227 * t0.y = boolean, is t0.x > 1.0, also misc temp usage
228 * t0.z = temporary for computing 1/(1-k) value
229 * t0.w = final coverage value
230 */
231
232 /* MUL t0.xy, tex, tex; # compute x^2, y^2 */
233 newInst = tgsi_default_full_instruction();
234 newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
235 newInst.Instruction.NumDstRegs = 1;
236 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
237 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
238 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XY;
239 newInst.Instruction.NumSrcRegs = 2;
240 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
241 newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
242 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
243 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
244 ctx->emit_instruction(ctx, &newInst);
245
246 /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */
247 newInst = tgsi_default_full_instruction();
248 newInst.Instruction.Opcode = TGSI_OPCODE_ADD;
249 newInst.Instruction.NumDstRegs = 1;
250 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
251 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
252 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
253 newInst.Instruction.NumSrcRegs = 2;
254 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
255 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
256 newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
257 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
258 newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
259 newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
260 ctx->emit_instruction(ctx, &newInst);
261
262 #if NORMALIZE /* OPTIONAL normalization of length */
263 /* RSQ t0.x, t0.x; */
264 newInst = tgsi_default_full_instruction();
265 newInst.Instruction.Opcode = TGSI_OPCODE_RSQ;
266 newInst.Instruction.NumDstRegs = 1;
267 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
268 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
269 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
270 newInst.Instruction.NumSrcRegs = 1;
271 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
272 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
273 ctx->emit_instruction(ctx, &newInst);
274
275 /* RCP t0.x, t0.x; */
276 newInst = tgsi_default_full_instruction();
277 newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
278 newInst.Instruction.NumDstRegs = 1;
279 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
280 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
281 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
282 newInst.Instruction.NumSrcRegs = 1;
283 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
284 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
285 ctx->emit_instruction(ctx, &newInst);
286 #endif
287
288 /* SGT t0.y, t0.xxxx, t0.wwww; # bool b = d > 1 (NOTE t0.w == 1) */
289 newInst = tgsi_default_full_instruction();
290 newInst.Instruction.Opcode = TGSI_OPCODE_SGT;
291 newInst.Instruction.NumDstRegs = 1;
292 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
293 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
294 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
295 newInst.Instruction.NumSrcRegs = 2;
296 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
297 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
298 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
299 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
300 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
301 newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
302 ctx->emit_instruction(ctx, &newInst);
303
304 /* KIL -tmp0.yyyy; # if -tmp0.y < 0, KILL */
305 newInst = tgsi_default_full_instruction();
306 newInst.Instruction.Opcode = TGSI_OPCODE_KIL;
307 newInst.Instruction.NumDstRegs = 0;
308 newInst.Instruction.NumSrcRegs = 1;
309 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
310 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
311 newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
312 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
313 newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
314 newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
315 newInst.FullSrcRegisters[0].SrcRegister.Negate = 1;
316 ctx->emit_instruction(ctx, &newInst);
317
318
319 /* compute coverage factor = (1-d)/(1-k) */
320
321 /* SUB t0.z, tex.w, tex.z; # m = 1 - k */
322 newInst = tgsi_default_full_instruction();
323 newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
324 newInst.Instruction.NumDstRegs = 1;
325 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
326 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
327 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
328 newInst.Instruction.NumSrcRegs = 2;
329 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
330 newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
331 newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
332 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
333 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
334 newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z;
335 ctx->emit_instruction(ctx, &newInst);
336
337 /* RCP t0.z, t0.z; # t0.z = 1 / m */
338 newInst = tgsi_default_full_instruction();
339 newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
340 newInst.Instruction.NumDstRegs = 1;
341 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
342 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
343 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
344 newInst.Instruction.NumSrcRegs = 1;
345 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
346 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
347 newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z;
348 ctx->emit_instruction(ctx, &newInst);
349
350 /* SUB t0.y, 1, t0.x; # d = 1 - d */
351 newInst = tgsi_default_full_instruction();
352 newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
353 newInst.Instruction.NumDstRegs = 1;
354 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
355 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
356 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
357 newInst.Instruction.NumSrcRegs = 2;
358 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
359 newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
360 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
361 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
362 newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
363 newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
364 ctx->emit_instruction(ctx, &newInst);
365
366 /* MUL t0.w, t0.y, t0.z; # coverage = d * m */
367 newInst = tgsi_default_full_instruction();
368 newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
369 newInst.Instruction.NumDstRegs = 1;
370 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
371 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
372 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
373 newInst.Instruction.NumSrcRegs = 2;
374 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
375 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
376 newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
377 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
378 newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
379 newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z;
380 ctx->emit_instruction(ctx, &newInst);
381
382 /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */
383 newInst = tgsi_default_full_instruction();
384 newInst.Instruction.Opcode = TGSI_OPCODE_SLE;
385 newInst.Instruction.NumDstRegs = 1;
386 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
387 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
388 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
389 newInst.Instruction.NumSrcRegs = 2;
390 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
391 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
392 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
393 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
394 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
395 newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z;
396 ctx->emit_instruction(ctx, &newInst);
397
398 /* CMP t0.w, -t0.y, tex.w, t0.w;
399 * # if -t0.y < 0 then
400 * t0.w = 1
401 * else
402 * t0.w = t0.w
403 */
404 newInst = tgsi_default_full_instruction();
405 newInst.Instruction.Opcode = TGSI_OPCODE_CMP;
406 newInst.Instruction.NumDstRegs = 1;
407 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
408 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
409 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
410 newInst.Instruction.NumSrcRegs = 3;
411 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
412 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
413 newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
414 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
415 newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
416 newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
417 newInst.FullSrcRegisters[0].SrcRegister.Negate = 1;
418 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
419 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
420 newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
421 newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
422 newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
423 newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
424 newInst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY;
425 newInst.FullSrcRegisters[2].SrcRegister.Index = tmp0;
426 newInst.FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
427 newInst.FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
428 newInst.FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
429 newInst.FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
430 ctx->emit_instruction(ctx, &newInst);
431
432 }
433
434 if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
435 /* add alpha modulation code at tail of program */
436
437 /* MOV result.color.xyz, colorTemp; */
438 newInst = tgsi_default_full_instruction();
439 newInst.Instruction.Opcode = TGSI_OPCODE_MOV;
440 newInst.Instruction.NumDstRegs = 1;
441 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
442 newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput;
443 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ;
444 newInst.Instruction.NumSrcRegs = 1;
445 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
446 newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp;
447 ctx->emit_instruction(ctx, &newInst);
448
449 /* MUL result.color.w, colorTemp, tmp0.w; */
450 newInst = tgsi_default_full_instruction();
451 newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
452 newInst.Instruction.NumDstRegs = 1;
453 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
454 newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput;
455 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
456 newInst.Instruction.NumSrcRegs = 2;
457 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
458 newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp;
459 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
460 newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->tmp0;
461 ctx->emit_instruction(ctx, &newInst);
462 }
463 else {
464 /* Not an END instruction.
465 * Look for writes to result.color and replace with colorTemp reg.
466 */
467 uint i;
468
469 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
470 struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
471 if (dst->DstRegister.File == TGSI_FILE_OUTPUT &&
472 dst->DstRegister.Index == aactx->colorOutput) {
473 dst->DstRegister.File = TGSI_FILE_TEMPORARY;
474 dst->DstRegister.Index = aactx->colorTemp;
475 }
476 }
477 }
478
479 ctx->emit_instruction(ctx, inst);
480 }
481
482
483 /**
484 * Generate the frag shader we'll use for drawing AA points.
485 * This will be the user's shader plus some texture/modulate instructions.
486 */
487 static boolean
488 generate_aapoint_fs(struct aapoint_stage *aapoint)
489 {
490 const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
491 struct pipe_shader_state aapoint_fs;
492 struct aa_transform_context transform;
493
494 #define MAX 1000
495
496 aapoint_fs = *orig_fs; /* copy to init */
497 aapoint_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX);
498 if (aapoint_fs.tokens == NULL)
499 return FALSE;
500
501 memset(&transform, 0, sizeof(transform));
502 transform.colorOutput = -1;
503 transform.maxInput = -1;
504 transform.maxGeneric = -1;
505 transform.colorTemp = -1;
506 transform.tmp0 = -1;
507 transform.firstInstruction = TRUE;
508 transform.base.transform_instruction = aa_transform_inst;
509 transform.base.transform_declaration = aa_transform_decl;
510
511 tgsi_transform_shader(orig_fs->tokens,
512 (struct tgsi_token *) aapoint_fs.tokens,
513 MAX, &transform.base);
514
515 #if 0 /* DEBUG */
516 printf("draw_aapoint, orig shader:\n");
517 tgsi_dump(orig_fs->tokens, 0);
518 printf("draw_aapoint, new shader:\n");
519 tgsi_dump(aapoint_fs.tokens, 0);
520 #endif
521
522 aapoint->fs->aapoint_fs
523 = aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs);
524 if (aapoint->fs->aapoint_fs == NULL)
525 return FALSE;
526
527 aapoint->fs->generic_attrib = transform.maxGeneric + 1;
528
529 return TRUE;
530 }
531
532
533 /**
534 * When we're about to draw our first AA point in a batch, this function is
535 * called to tell the driver to bind our modified fragment shader.
536 */
537 static boolean
538 bind_aapoint_fragment_shader(struct aapoint_stage *aapoint)
539 {
540 struct draw_context *draw = aapoint->stage.draw;
541
542 if (!aapoint->fs->aapoint_fs &&
543 !generate_aapoint_fs(aapoint))
544 return FALSE;
545
546 draw->suspend_flushing = TRUE;
547 aapoint->driver_bind_fs_state(aapoint->pipe, aapoint->fs->aapoint_fs);
548 draw->suspend_flushing = FALSE;
549
550 return TRUE;
551 }
552
553
554
555 static INLINE struct aapoint_stage *
556 aapoint_stage( struct draw_stage *stage )
557 {
558 return (struct aapoint_stage *) stage;
559 }
560
561
562
563
564 /**
565 * Draw an AA point by drawing a quad.
566 */
567 static void
568 aapoint_point(struct draw_stage *stage, struct prim_header *header)
569 {
570 const struct aapoint_stage *aapoint = aapoint_stage(stage);
571 struct prim_header tri;
572 struct vertex_header *v[4];
573 uint texPos = aapoint->tex_slot;
574 uint pos_slot = aapoint->pos_slot;
575 float radius, *pos, *tex;
576 uint i;
577 float k;
578
579 if (aapoint->psize_slot >= 0) {
580 radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0];
581 }
582 else {
583 radius = aapoint->radius;
584 }
585
586 /*
587 * Note: the texcoords (generic attrib, really) we use are special:
588 * The S and T components simply vary from -1 to +1.
589 * The R component is k, below.
590 * The Q component is 1.0 and will used as a handy constant in the
591 * fragment shader.
592 */
593
594 /*
595 * k is the threshold distance from the point's center at which
596 * we begin alpha attenuation (the coverage value).
597 * Operating within a unit circle, we'll compute the fragment's
598 * distance 'd' from the center point using the texcoords.
599 * IF d > 1.0 THEN
600 * KILL fragment
601 * ELSE IF d > k THEN
602 * compute coverage in [0,1] proportional to d in [k, 1].
603 * ELSE
604 * coverage = 1.0; // full coverage
605 * ENDIF
606 *
607 * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to
608 * avoid using IF/ELSE/ENDIF TGSI opcodes.
609 */
610
611 #if !NORMALIZE
612 k = 1.0f / radius;
613 k = 1.0f - 2.0f * k + k * k;
614 #else
615 k = 1.0f - 1.0f / radius;
616 #endif
617
618 /* allocate/dup new verts */
619 for (i = 0; i < 4; i++) {
620 v[i] = dup_vert(stage, header->v[0], i);
621 }
622
623 /* new verts */
624 pos = v[0]->data[pos_slot];
625 pos[0] -= radius;
626 pos[1] -= radius;
627
628 pos = v[1]->data[pos_slot];
629 pos[0] += radius;
630 pos[1] -= radius;
631
632 pos = v[2]->data[pos_slot];
633 pos[0] += radius;
634 pos[1] += radius;
635
636 pos = v[3]->data[pos_slot];
637 pos[0] -= radius;
638 pos[1] += radius;
639
640 /* new texcoords */
641 tex = v[0]->data[texPos];
642 ASSIGN_4V(tex, -1, -1, k, 1);
643
644 tex = v[1]->data[texPos];
645 ASSIGN_4V(tex, 1, -1, k, 1);
646
647 tex = v[2]->data[texPos];
648 ASSIGN_4V(tex, 1, 1, k, 1);
649
650 tex = v[3]->data[texPos];
651 ASSIGN_4V(tex, -1, 1, k, 1);
652
653 /* emit 2 tris for the quad strip */
654 tri.v[0] = v[0];
655 tri.v[1] = v[1];
656 tri.v[2] = v[2];
657 stage->next->tri( stage->next, &tri );
658
659 tri.v[0] = v[0];
660 tri.v[1] = v[2];
661 tri.v[2] = v[3];
662 stage->next->tri( stage->next, &tri );
663 }
664
665
666 static void
667 aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
668 {
669 auto struct aapoint_stage *aapoint = aapoint_stage(stage);
670 struct draw_context *draw = stage->draw;
671
672 assert(draw->rasterizer->point_smooth);
673
674 if (draw->rasterizer->point_size <= 2.0)
675 aapoint->radius = 1.0;
676 else
677 aapoint->radius = 0.5f * draw->rasterizer->point_size;
678
679 /*
680 * Bind (generate) our fragprog.
681 */
682 bind_aapoint_fragment_shader(aapoint);
683
684 /* update vertex attrib info */
685 aapoint->tex_slot = draw->vs.num_vs_outputs;
686 assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
687
688 aapoint->pos_slot = draw->vs.position_output;
689
690 draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
691 draw->extra_vp_outputs.semantic_index = aapoint->fs->generic_attrib;
692 draw->extra_vp_outputs.slot = aapoint->tex_slot;
693
694 /* find psize slot in post-transform vertex */
695 aapoint->psize_slot = -1;
696 if (draw->rasterizer->point_size_per_vertex) {
697 /* find PSIZ vertex output */
698 const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
699 uint i;
700 for (i = 0; i < vs->info.num_outputs; i++) {
701 if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
702 aapoint->psize_slot = i;
703 break;
704 }
705 }
706 }
707
708 /* now really draw first point */
709 stage->point = aapoint_point;
710 stage->point(stage, header);
711 }
712
713
714 static void
715 aapoint_flush(struct draw_stage *stage, unsigned flags)
716 {
717 struct draw_context *draw = stage->draw;
718 struct aapoint_stage *aapoint = aapoint_stage(stage);
719 struct pipe_context *pipe = aapoint->pipe;
720
721 stage->point = aapoint_first_point;
722 stage->next->flush( stage->next, flags );
723
724 /* restore original frag shader */
725 draw->suspend_flushing = TRUE;
726 aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs);
727 draw->suspend_flushing = FALSE;
728
729 draw->extra_vp_outputs.slot = 0;
730 }
731
732
733 static void
734 aapoint_reset_stipple_counter(struct draw_stage *stage)
735 {
736 stage->next->reset_stipple_counter( stage->next );
737 }
738
739
740 static void
741 aapoint_destroy(struct draw_stage *stage)
742 {
743 draw_free_temp_verts( stage );
744 FREE( stage );
745 }
746
747
748 static struct aapoint_stage *
749 draw_aapoint_stage(struct draw_context *draw)
750 {
751 struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage);
752 if (aapoint == NULL)
753 goto fail;
754
755 if (!draw_alloc_temp_verts( &aapoint->stage, 4 ))
756 goto fail;
757
758 aapoint->stage.draw = draw;
759 aapoint->stage.next = NULL;
760 aapoint->stage.point = aapoint_first_point;
761 aapoint->stage.line = draw_pipe_passthrough_line;
762 aapoint->stage.tri = draw_pipe_passthrough_tri;
763 aapoint->stage.flush = aapoint_flush;
764 aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter;
765 aapoint->stage.destroy = aapoint_destroy;
766
767 return aapoint;
768
769 fail:
770 if (aapoint)
771 aapoint_destroy(&aapoint->stage);
772
773 return NULL;
774
775 }
776
777
778 static struct aapoint_stage *
779 aapoint_stage_from_pipe(struct pipe_context *pipe)
780 {
781 struct draw_context *draw = (struct draw_context *) pipe->draw;
782 return aapoint_stage(draw->pipeline.aapoint);
783 }
784
785
786 /**
787 * This function overrides the driver's create_fs_state() function and
788 * will typically be called by the state tracker.
789 */
790 static void *
791 aapoint_create_fs_state(struct pipe_context *pipe,
792 const struct pipe_shader_state *fs)
793 {
794 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
795 struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader);
796 if (aafs == NULL)
797 return NULL;
798
799 aafs->state = *fs;
800
801 /* pass-through */
802 aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs);
803
804 return aafs;
805 }
806
807
808 static void
809 aapoint_bind_fs_state(struct pipe_context *pipe, void *fs)
810 {
811 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
812 struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
813 /* save current */
814 aapoint->fs = aafs;
815 /* pass-through */
816 aapoint->driver_bind_fs_state(aapoint->pipe,
817 (aafs ? aafs->driver_fs : NULL));
818 }
819
820
821 static void
822 aapoint_delete_fs_state(struct pipe_context *pipe, void *fs)
823 {
824 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
825 struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
826 /* pass-through */
827 aapoint->driver_delete_fs_state(aapoint->pipe, aafs->driver_fs);
828 FREE(aafs);
829 }
830
831
832 /**
833 * Called by drivers that want to install this AA point prim stage
834 * into the draw module's pipeline. This will not be used if the
835 * hardware has native support for AA points.
836 */
837 boolean
838 draw_install_aapoint_stage(struct draw_context *draw,
839 struct pipe_context *pipe)
840 {
841 struct aapoint_stage *aapoint;
842
843 pipe->draw = (void *) draw;
844
845 /*
846 * Create / install AA point drawing / prim stage
847 */
848 aapoint = draw_aapoint_stage( draw );
849 if (aapoint == NULL)
850 goto fail;
851
852 aapoint->pipe = pipe;
853
854 /* save original driver functions */
855 aapoint->driver_create_fs_state = pipe->create_fs_state;
856 aapoint->driver_bind_fs_state = pipe->bind_fs_state;
857 aapoint->driver_delete_fs_state = pipe->delete_fs_state;
858
859 /* override the driver's functions */
860 pipe->create_fs_state = aapoint_create_fs_state;
861 pipe->bind_fs_state = aapoint_bind_fs_state;
862 pipe->delete_fs_state = aapoint_delete_fs_state;
863
864 draw->pipeline.aapoint = &aapoint->stage;
865
866 return TRUE;
867
868 fail:
869 if (aapoint)
870 aapoint->stage.destroy( &aapoint->stage );
871
872 return FALSE;
873 }