Merge remote branch 'origin/master' into radeon-rewrite
[mesa.git] / src / gallium / auxiliary / draw / draw_pipe_aapoint.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * AA point stage: AA points are converted to quads and rendered with a
30 * special fragment shader. Another approach would be to use a texture
31 * map image of a point, but experiments indicate the quality isn't nearly
32 * as good as this approach.
33 *
34 * Note: this looks a lot like draw_aaline.c but there's actually little
35 * if any code that can be shared.
36 *
37 * Authors: Brian Paul
38 */
39
40
41 #include "pipe/p_context.h"
42 #include "pipe/p_defines.h"
43 #include "pipe/p_shader_tokens.h"
44
45 #include "tgsi/tgsi_transform.h"
46 #include "tgsi/tgsi_dump.h"
47
48 #include "util/u_math.h"
49 #include "util/u_memory.h"
50
51 #include "draw_context.h"
52 #include "draw_vs.h"
53 #include "draw_pipe.h"
54
55
56 /*
57 * Enabling NORMALIZE might give _slightly_ better results.
58 * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or
59 * d=x*x+y*y. Since we're working with a unit circle, the later seems
60 * close enough and saves some costly instructions.
61 */
62 #define NORMALIZE 0
63
64
65 /**
66 * Subclass of pipe_shader_state to carry extra fragment shader info.
67 */
68 struct aapoint_fragment_shader
69 {
70 struct pipe_shader_state state;
71 void *driver_fs; /**< the regular shader */
72 void *aapoint_fs; /**< the aa point-augmented shader */
73 int generic_attrib; /**< The generic input attrib/texcoord we'll use */
74 };
75
76
77 /**
78 * Subclass of draw_stage
79 */
80 struct aapoint_stage
81 {
82 struct draw_stage stage;
83
84 int psize_slot;
85 float radius;
86
87 /** this is the vertex attrib slot for the new texcoords */
88 uint tex_slot;
89 uint pos_slot;
90
91 /*
92 * Currently bound state
93 */
94 struct aapoint_fragment_shader *fs;
95
96 /*
97 * Driver interface/override functions
98 */
99 void * (*driver_create_fs_state)(struct pipe_context *,
100 const struct pipe_shader_state *);
101 void (*driver_bind_fs_state)(struct pipe_context *, void *);
102 void (*driver_delete_fs_state)(struct pipe_context *, void *);
103
104 struct pipe_context *pipe;
105 };
106
107
108
109 /**
110 * Subclass of tgsi_transform_context, used for transforming the
111 * user's fragment shader to add the special AA instructions.
112 */
113 struct aa_transform_context {
114 struct tgsi_transform_context base;
115 uint tempsUsed; /**< bitmask */
116 int colorOutput; /**< which output is the primary color */
117 int maxInput, maxGeneric; /**< max input index found */
118 int tmp0, colorTemp; /**< temp registers */
119 boolean firstInstruction;
120 };
121
122
123 /**
124 * TGSI declaration transform callback.
125 * Look for two free temp regs and available input reg for new texcoords.
126 */
127 static void
128 aa_transform_decl(struct tgsi_transform_context *ctx,
129 struct tgsi_full_declaration *decl)
130 {
131 struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
132
133 if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
134 decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR &&
135 decl->Semantic.SemanticIndex == 0) {
136 aactx->colorOutput = decl->DeclarationRange.First;
137 }
138 else if (decl->Declaration.File == TGSI_FILE_INPUT) {
139 if ((int) decl->DeclarationRange.Last > aactx->maxInput)
140 aactx->maxInput = decl->DeclarationRange.Last;
141 if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC &&
142 (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) {
143 aactx->maxGeneric = decl->Semantic.SemanticIndex;
144 }
145 }
146 else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
147 uint i;
148 for (i = decl->DeclarationRange.First;
149 i <= decl->DeclarationRange.Last; i++) {
150 aactx->tempsUsed |= (1 << i);
151 }
152 }
153
154 ctx->emit_declaration(ctx, decl);
155 }
156
157
158 /**
159 * TGSI instruction transform callback.
160 * Replace writes to result.color w/ a temp reg.
161 * Upon END instruction, insert texture sampling code for antialiasing.
162 */
163 static void
164 aa_transform_inst(struct tgsi_transform_context *ctx,
165 struct tgsi_full_instruction *inst)
166 {
167 struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
168 struct tgsi_full_instruction newInst;
169
170 if (aactx->firstInstruction) {
171 /* emit our new declarations before the first instruction */
172
173 struct tgsi_full_declaration decl;
174 const int texInput = aactx->maxInput + 1;
175 int tmp0;
176 uint i;
177
178 /* find two free temp regs */
179 for (i = 0; i < 32; i++) {
180 if ((aactx->tempsUsed & (1 << i)) == 0) {
181 /* found a free temp */
182 if (aactx->tmp0 < 0)
183 aactx->tmp0 = i;
184 else if (aactx->colorTemp < 0)
185 aactx->colorTemp = i;
186 else
187 break;
188 }
189 }
190
191 assert(aactx->colorTemp != aactx->tmp0);
192
193 tmp0 = aactx->tmp0;
194
195 /* declare new generic input/texcoord */
196 decl = tgsi_default_full_declaration();
197 decl.Declaration.File = TGSI_FILE_INPUT;
198 /* XXX this could be linear... */
199 decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
200 decl.Declaration.Semantic = 1;
201 decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
202 decl.Semantic.SemanticIndex = aactx->maxGeneric + 1;
203 decl.DeclarationRange.First =
204 decl.DeclarationRange.Last = texInput;
205 ctx->emit_declaration(ctx, &decl);
206
207 /* declare new temp regs */
208 decl = tgsi_default_full_declaration();
209 decl.Declaration.File = TGSI_FILE_TEMPORARY;
210 decl.DeclarationRange.First =
211 decl.DeclarationRange.Last = tmp0;
212 ctx->emit_declaration(ctx, &decl);
213
214 decl = tgsi_default_full_declaration();
215 decl.Declaration.File = TGSI_FILE_TEMPORARY;
216 decl.DeclarationRange.First =
217 decl.DeclarationRange.Last = aactx->colorTemp;
218 ctx->emit_declaration(ctx, &decl);
219
220 aactx->firstInstruction = FALSE;
221
222
223 /*
224 * Emit code to compute fragment coverage, kill if outside point radius
225 *
226 * Temp reg0 usage:
227 * t0.x = distance of fragment from center point
228 * t0.y = boolean, is t0.x > 1.0, also misc temp usage
229 * t0.z = temporary for computing 1/(1-k) value
230 * t0.w = final coverage value
231 */
232
233 /* MUL t0.xy, tex, tex; # compute x^2, y^2 */
234 newInst = tgsi_default_full_instruction();
235 newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
236 newInst.Instruction.NumDstRegs = 1;
237 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
238 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
239 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XY;
240 newInst.Instruction.NumSrcRegs = 2;
241 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
242 newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
243 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
244 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
245 ctx->emit_instruction(ctx, &newInst);
246
247 /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */
248 newInst = tgsi_default_full_instruction();
249 newInst.Instruction.Opcode = TGSI_OPCODE_ADD;
250 newInst.Instruction.NumDstRegs = 1;
251 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
252 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
253 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
254 newInst.Instruction.NumSrcRegs = 2;
255 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
256 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
257 newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
258 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
259 newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
260 newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
261 ctx->emit_instruction(ctx, &newInst);
262
263 #if NORMALIZE /* OPTIONAL normalization of length */
264 /* RSQ t0.x, t0.x; */
265 newInst = tgsi_default_full_instruction();
266 newInst.Instruction.Opcode = TGSI_OPCODE_RSQ;
267 newInst.Instruction.NumDstRegs = 1;
268 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
269 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
270 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
271 newInst.Instruction.NumSrcRegs = 1;
272 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
273 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
274 ctx->emit_instruction(ctx, &newInst);
275
276 /* RCP t0.x, t0.x; */
277 newInst = tgsi_default_full_instruction();
278 newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
279 newInst.Instruction.NumDstRegs = 1;
280 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
281 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
282 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
283 newInst.Instruction.NumSrcRegs = 1;
284 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
285 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
286 ctx->emit_instruction(ctx, &newInst);
287 #endif
288
289 /* SGT t0.y, t0.xxxx, tex.wwww; # bool b = d > 1 (NOTE tex.w == 1) */
290 newInst = tgsi_default_full_instruction();
291 newInst.Instruction.Opcode = TGSI_OPCODE_SGT;
292 newInst.Instruction.NumDstRegs = 1;
293 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
294 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
295 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
296 newInst.Instruction.NumSrcRegs = 2;
297 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
298 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
299 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
300 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
301 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
302 newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
303 ctx->emit_instruction(ctx, &newInst);
304
305 /* KIL -tmp0.yyyy; # if -tmp0.y < 0, KILL */
306 newInst = tgsi_default_full_instruction();
307 newInst.Instruction.Opcode = TGSI_OPCODE_KIL;
308 newInst.Instruction.NumDstRegs = 0;
309 newInst.Instruction.NumSrcRegs = 1;
310 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
311 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
312 newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
313 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
314 newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
315 newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
316 newInst.FullSrcRegisters[0].SrcRegister.Negate = 1;
317 ctx->emit_instruction(ctx, &newInst);
318
319
320 /* compute coverage factor = (1-d)/(1-k) */
321
322 /* SUB t0.z, tex.w, tex.z; # m = 1 - k */
323 newInst = tgsi_default_full_instruction();
324 newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
325 newInst.Instruction.NumDstRegs = 1;
326 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
327 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
328 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
329 newInst.Instruction.NumSrcRegs = 2;
330 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
331 newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
332 newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
333 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
334 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
335 newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z;
336 ctx->emit_instruction(ctx, &newInst);
337
338 /* RCP t0.z, t0.z; # t0.z = 1 / m */
339 newInst = tgsi_default_full_instruction();
340 newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
341 newInst.Instruction.NumDstRegs = 1;
342 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
343 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
344 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
345 newInst.Instruction.NumSrcRegs = 1;
346 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
347 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
348 newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z;
349 ctx->emit_instruction(ctx, &newInst);
350
351 /* SUB t0.y, 1, t0.x; # d = 1 - d */
352 newInst = tgsi_default_full_instruction();
353 newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
354 newInst.Instruction.NumDstRegs = 1;
355 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
356 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
357 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
358 newInst.Instruction.NumSrcRegs = 2;
359 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
360 newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
361 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
362 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
363 newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
364 newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
365 ctx->emit_instruction(ctx, &newInst);
366
367 /* MUL t0.w, t0.y, t0.z; # coverage = d * m */
368 newInst = tgsi_default_full_instruction();
369 newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
370 newInst.Instruction.NumDstRegs = 1;
371 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
372 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
373 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
374 newInst.Instruction.NumSrcRegs = 2;
375 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
376 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
377 newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
378 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
379 newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
380 newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z;
381 ctx->emit_instruction(ctx, &newInst);
382
383 /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */
384 newInst = tgsi_default_full_instruction();
385 newInst.Instruction.Opcode = TGSI_OPCODE_SLE;
386 newInst.Instruction.NumDstRegs = 1;
387 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
388 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
389 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
390 newInst.Instruction.NumSrcRegs = 2;
391 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
392 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
393 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
394 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
395 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
396 newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z;
397 ctx->emit_instruction(ctx, &newInst);
398
399 /* CMP t0.w, -t0.y, tex.w, t0.w;
400 * # if -t0.y < 0 then
401 * t0.w = 1
402 * else
403 * t0.w = t0.w
404 */
405 newInst = tgsi_default_full_instruction();
406 newInst.Instruction.Opcode = TGSI_OPCODE_CMP;
407 newInst.Instruction.NumDstRegs = 1;
408 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
409 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
410 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
411 newInst.Instruction.NumSrcRegs = 3;
412 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
413 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
414 newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
415 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
416 newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
417 newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
418 newInst.FullSrcRegisters[0].SrcRegister.Negate = 1;
419 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
420 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
421 newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
422 newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
423 newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
424 newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
425 newInst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY;
426 newInst.FullSrcRegisters[2].SrcRegister.Index = tmp0;
427 newInst.FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
428 newInst.FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
429 newInst.FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
430 newInst.FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
431 ctx->emit_instruction(ctx, &newInst);
432
433 }
434
435 if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
436 /* add alpha modulation code at tail of program */
437
438 /* MOV result.color.xyz, colorTemp; */
439 newInst = tgsi_default_full_instruction();
440 newInst.Instruction.Opcode = TGSI_OPCODE_MOV;
441 newInst.Instruction.NumDstRegs = 1;
442 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
443 newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput;
444 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ;
445 newInst.Instruction.NumSrcRegs = 1;
446 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
447 newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp;
448 ctx->emit_instruction(ctx, &newInst);
449
450 /* MUL result.color.w, colorTemp, tmp0.w; */
451 newInst = tgsi_default_full_instruction();
452 newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
453 newInst.Instruction.NumDstRegs = 1;
454 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
455 newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput;
456 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
457 newInst.Instruction.NumSrcRegs = 2;
458 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
459 newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp;
460 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
461 newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->tmp0;
462 ctx->emit_instruction(ctx, &newInst);
463 }
464 else {
465 /* Not an END instruction.
466 * Look for writes to result.color and replace with colorTemp reg.
467 */
468 uint i;
469
470 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
471 struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
472 if (dst->DstRegister.File == TGSI_FILE_OUTPUT &&
473 dst->DstRegister.Index == aactx->colorOutput) {
474 dst->DstRegister.File = TGSI_FILE_TEMPORARY;
475 dst->DstRegister.Index = aactx->colorTemp;
476 }
477 }
478 }
479
480 ctx->emit_instruction(ctx, inst);
481 }
482
483
484 /**
485 * Generate the frag shader we'll use for drawing AA points.
486 * This will be the user's shader plus some texture/modulate instructions.
487 */
488 static boolean
489 generate_aapoint_fs(struct aapoint_stage *aapoint)
490 {
491 const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
492 struct pipe_shader_state aapoint_fs;
493 struct aa_transform_context transform;
494
495 #define MAX 1000
496
497 aapoint_fs = *orig_fs; /* copy to init */
498 aapoint_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX);
499 if (aapoint_fs.tokens == NULL)
500 return FALSE;
501
502 memset(&transform, 0, sizeof(transform));
503 transform.colorOutput = -1;
504 transform.maxInput = -1;
505 transform.maxGeneric = -1;
506 transform.colorTemp = -1;
507 transform.tmp0 = -1;
508 transform.firstInstruction = TRUE;
509 transform.base.transform_instruction = aa_transform_inst;
510 transform.base.transform_declaration = aa_transform_decl;
511
512 tgsi_transform_shader(orig_fs->tokens,
513 (struct tgsi_token *) aapoint_fs.tokens,
514 MAX, &transform.base);
515
516 #if 0 /* DEBUG */
517 printf("draw_aapoint, orig shader:\n");
518 tgsi_dump(orig_fs->tokens, 0);
519 printf("draw_aapoint, new shader:\n");
520 tgsi_dump(aapoint_fs.tokens, 0);
521 #endif
522
523 aapoint->fs->aapoint_fs
524 = aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs);
525 if (aapoint->fs->aapoint_fs == NULL)
526 return FALSE;
527
528 aapoint->fs->generic_attrib = transform.maxGeneric + 1;
529
530 return TRUE;
531 }
532
533
534 /**
535 * When we're about to draw our first AA point in a batch, this function is
536 * called to tell the driver to bind our modified fragment shader.
537 */
538 static boolean
539 bind_aapoint_fragment_shader(struct aapoint_stage *aapoint)
540 {
541 struct draw_context *draw = aapoint->stage.draw;
542
543 if (!aapoint->fs->aapoint_fs &&
544 !generate_aapoint_fs(aapoint))
545 return FALSE;
546
547 draw->suspend_flushing = TRUE;
548 aapoint->driver_bind_fs_state(aapoint->pipe, aapoint->fs->aapoint_fs);
549 draw->suspend_flushing = FALSE;
550
551 return TRUE;
552 }
553
554
555
556 static INLINE struct aapoint_stage *
557 aapoint_stage( struct draw_stage *stage )
558 {
559 return (struct aapoint_stage *) stage;
560 }
561
562
563
564
565 /**
566 * Draw an AA point by drawing a quad.
567 */
568 static void
569 aapoint_point(struct draw_stage *stage, struct prim_header *header)
570 {
571 const struct aapoint_stage *aapoint = aapoint_stage(stage);
572 struct prim_header tri;
573 struct vertex_header *v[4];
574 uint texPos = aapoint->tex_slot;
575 uint pos_slot = aapoint->pos_slot;
576 float radius, *pos, *tex;
577 uint i;
578 float k;
579
580 if (aapoint->psize_slot >= 0) {
581 radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0];
582 }
583 else {
584 radius = aapoint->radius;
585 }
586
587 /*
588 * Note: the texcoords (generic attrib, really) we use are special:
589 * The S and T components simply vary from -1 to +1.
590 * The R component is k, below.
591 * The Q component is 1.0 and will used as a handy constant in the
592 * fragment shader.
593 */
594
595 /*
596 * k is the threshold distance from the point's center at which
597 * we begin alpha attenuation (the coverage value).
598 * Operating within a unit circle, we'll compute the fragment's
599 * distance 'd' from the center point using the texcoords.
600 * IF d > 1.0 THEN
601 * KILL fragment
602 * ELSE IF d > k THEN
603 * compute coverage in [0,1] proportional to d in [k, 1].
604 * ELSE
605 * coverage = 1.0; // full coverage
606 * ENDIF
607 *
608 * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to
609 * avoid using IF/ELSE/ENDIF TGSI opcodes.
610 */
611
612 #if !NORMALIZE
613 k = 1.0f / radius;
614 k = 1.0f - 2.0f * k + k * k;
615 #else
616 k = 1.0f - 1.0f / radius;
617 #endif
618
619 /* allocate/dup new verts */
620 for (i = 0; i < 4; i++) {
621 v[i] = dup_vert(stage, header->v[0], i);
622 }
623
624 /* new verts */
625 pos = v[0]->data[pos_slot];
626 pos[0] -= radius;
627 pos[1] -= radius;
628
629 pos = v[1]->data[pos_slot];
630 pos[0] += radius;
631 pos[1] -= radius;
632
633 pos = v[2]->data[pos_slot];
634 pos[0] += radius;
635 pos[1] += radius;
636
637 pos = v[3]->data[pos_slot];
638 pos[0] -= radius;
639 pos[1] += radius;
640
641 /* new texcoords */
642 tex = v[0]->data[texPos];
643 ASSIGN_4V(tex, -1, -1, k, 1);
644
645 tex = v[1]->data[texPos];
646 ASSIGN_4V(tex, 1, -1, k, 1);
647
648 tex = v[2]->data[texPos];
649 ASSIGN_4V(tex, 1, 1, k, 1);
650
651 tex = v[3]->data[texPos];
652 ASSIGN_4V(tex, -1, 1, k, 1);
653
654 /* emit 2 tris for the quad strip */
655 tri.v[0] = v[0];
656 tri.v[1] = v[1];
657 tri.v[2] = v[2];
658 stage->next->tri( stage->next, &tri );
659
660 tri.v[0] = v[0];
661 tri.v[1] = v[2];
662 tri.v[2] = v[3];
663 stage->next->tri( stage->next, &tri );
664 }
665
666
667 static void
668 aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
669 {
670 auto struct aapoint_stage *aapoint = aapoint_stage(stage);
671 struct draw_context *draw = stage->draw;
672
673 assert(draw->rasterizer->point_smooth);
674
675 if (draw->rasterizer->point_size <= 2.0)
676 aapoint->radius = 1.0;
677 else
678 aapoint->radius = 0.5f * draw->rasterizer->point_size;
679
680 /*
681 * Bind (generate) our fragprog.
682 */
683 bind_aapoint_fragment_shader(aapoint);
684
685 /* update vertex attrib info */
686 aapoint->tex_slot = draw->vs.num_vs_outputs;
687 assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
688
689 aapoint->pos_slot = draw->vs.position_output;
690
691 draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
692 draw->extra_vp_outputs.semantic_index = aapoint->fs->generic_attrib;
693 draw->extra_vp_outputs.slot = aapoint->tex_slot;
694
695 /* find psize slot in post-transform vertex */
696 aapoint->psize_slot = -1;
697 if (draw->rasterizer->point_size_per_vertex) {
698 /* find PSIZ vertex output */
699 const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
700 uint i;
701 for (i = 0; i < vs->info.num_outputs; i++) {
702 if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
703 aapoint->psize_slot = i;
704 break;
705 }
706 }
707 }
708
709 /* now really draw first point */
710 stage->point = aapoint_point;
711 stage->point(stage, header);
712 }
713
714
715 static void
716 aapoint_flush(struct draw_stage *stage, unsigned flags)
717 {
718 struct draw_context *draw = stage->draw;
719 struct aapoint_stage *aapoint = aapoint_stage(stage);
720 struct pipe_context *pipe = aapoint->pipe;
721
722 stage->point = aapoint_first_point;
723 stage->next->flush( stage->next, flags );
724
725 /* restore original frag shader */
726 draw->suspend_flushing = TRUE;
727 aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs);
728 draw->suspend_flushing = FALSE;
729
730 draw->extra_vp_outputs.slot = 0;
731 }
732
733
734 static void
735 aapoint_reset_stipple_counter(struct draw_stage *stage)
736 {
737 stage->next->reset_stipple_counter( stage->next );
738 }
739
740
741 static void
742 aapoint_destroy(struct draw_stage *stage)
743 {
744 draw_free_temp_verts( stage );
745 FREE( stage );
746 }
747
748
749 static struct aapoint_stage *
750 draw_aapoint_stage(struct draw_context *draw)
751 {
752 struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage);
753 if (aapoint == NULL)
754 goto fail;
755
756 if (!draw_alloc_temp_verts( &aapoint->stage, 4 ))
757 goto fail;
758
759 aapoint->stage.draw = draw;
760 aapoint->stage.next = NULL;
761 aapoint->stage.point = aapoint_first_point;
762 aapoint->stage.line = draw_pipe_passthrough_line;
763 aapoint->stage.tri = draw_pipe_passthrough_tri;
764 aapoint->stage.flush = aapoint_flush;
765 aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter;
766 aapoint->stage.destroy = aapoint_destroy;
767
768 return aapoint;
769
770 fail:
771 if (aapoint)
772 aapoint_destroy(&aapoint->stage);
773
774 return NULL;
775
776 }
777
778
779 static struct aapoint_stage *
780 aapoint_stage_from_pipe(struct pipe_context *pipe)
781 {
782 struct draw_context *draw = (struct draw_context *) pipe->draw;
783 return aapoint_stage(draw->pipeline.aapoint);
784 }
785
786
787 /**
788 * This function overrides the driver's create_fs_state() function and
789 * will typically be called by the state tracker.
790 */
791 static void *
792 aapoint_create_fs_state(struct pipe_context *pipe,
793 const struct pipe_shader_state *fs)
794 {
795 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
796 struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader);
797 if (aafs == NULL)
798 return NULL;
799
800 aafs->state = *fs;
801
802 /* pass-through */
803 aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs);
804
805 return aafs;
806 }
807
808
809 static void
810 aapoint_bind_fs_state(struct pipe_context *pipe, void *fs)
811 {
812 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
813 struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
814 /* save current */
815 aapoint->fs = aafs;
816 /* pass-through */
817 aapoint->driver_bind_fs_state(aapoint->pipe,
818 (aafs ? aafs->driver_fs : NULL));
819 }
820
821
822 static void
823 aapoint_delete_fs_state(struct pipe_context *pipe, void *fs)
824 {
825 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
826 struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
827 /* pass-through */
828 aapoint->driver_delete_fs_state(aapoint->pipe, aafs->driver_fs);
829 FREE(aafs);
830 }
831
832
833 /**
834 * Called by drivers that want to install this AA point prim stage
835 * into the draw module's pipeline. This will not be used if the
836 * hardware has native support for AA points.
837 */
838 boolean
839 draw_install_aapoint_stage(struct draw_context *draw,
840 struct pipe_context *pipe)
841 {
842 struct aapoint_stage *aapoint;
843
844 pipe->draw = (void *) draw;
845
846 /*
847 * Create / install AA point drawing / prim stage
848 */
849 aapoint = draw_aapoint_stage( draw );
850 if (aapoint == NULL)
851 goto fail;
852
853 aapoint->pipe = pipe;
854
855 /* save original driver functions */
856 aapoint->driver_create_fs_state = pipe->create_fs_state;
857 aapoint->driver_bind_fs_state = pipe->bind_fs_state;
858 aapoint->driver_delete_fs_state = pipe->delete_fs_state;
859
860 /* override the driver's functions */
861 pipe->create_fs_state = aapoint_create_fs_state;
862 pipe->bind_fs_state = aapoint_bind_fs_state;
863 pipe->delete_fs_state = aapoint_delete_fs_state;
864
865 draw->pipeline.aapoint = &aapoint->stage;
866
867 return TRUE;
868
869 fail:
870 if (aapoint)
871 aapoint->stage.destroy( &aapoint->stage );
872
873 return FALSE;
874 }