Merge commit 'origin/gallium-master-merge'
[mesa.git] / src / gallium / auxiliary / draw / draw_pipe_aapoint.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * AA point stage: AA points are converted to quads and rendered with a
30 * special fragment shader. Another approach would be to use a texture
31 * map image of a point, but experiments indicate the quality isn't nearly
32 * as good as this approach.
33 *
34 * Note: this looks a lot like draw_aaline.c but there's actually little
35 * if any code that can be shared.
36 *
37 * Authors: Brian Paul
38 */
39
40
41 #include "pipe/p_inlines.h"
42 #include "pipe/p_context.h"
43 #include "pipe/p_defines.h"
44 #include "pipe/p_shader_tokens.h"
45
46 #include "tgsi/tgsi_transform.h"
47 #include "tgsi/tgsi_dump.h"
48
49 #include "util/u_math.h"
50 #include "util/u_memory.h"
51
52 #include "draw_context.h"
53 #include "draw_vs.h"
54 #include "draw_pipe.h"
55
56
57 /*
58 * Enabling NORMALIZE might give _slightly_ better results.
59 * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or
60 * d=x*x+y*y. Since we're working with a unit circle, the later seems
61 * close enough and saves some costly instructions.
62 */
63 #define NORMALIZE 0
64
65
66 /**
67 * Subclass of pipe_shader_state to carry extra fragment shader info.
68 */
69 struct aapoint_fragment_shader
70 {
71 struct pipe_shader_state state;
72 void *driver_fs; /**< the regular shader */
73 void *aapoint_fs; /**< the aa point-augmented shader */
74 int generic_attrib; /**< The generic input attrib/texcoord we'll use */
75 };
76
77
78 /**
79 * Subclass of draw_stage
80 */
81 struct aapoint_stage
82 {
83 struct draw_stage stage;
84
85 int psize_slot;
86 float radius;
87
88 /** this is the vertex attrib slot for the new texcoords */
89 uint tex_slot;
90 uint pos_slot;
91
92 /*
93 * Currently bound state
94 */
95 struct aapoint_fragment_shader *fs;
96
97 /*
98 * Driver interface/override functions
99 */
100 void * (*driver_create_fs_state)(struct pipe_context *,
101 const struct pipe_shader_state *);
102 void (*driver_bind_fs_state)(struct pipe_context *, void *);
103 void (*driver_delete_fs_state)(struct pipe_context *, void *);
104
105 struct pipe_context *pipe;
106 };
107
108
109
110 /**
111 * Subclass of tgsi_transform_context, used for transforming the
112 * user's fragment shader to add the special AA instructions.
113 */
114 struct aa_transform_context {
115 struct tgsi_transform_context base;
116 uint tempsUsed; /**< bitmask */
117 int colorOutput; /**< which output is the primary color */
118 int maxInput, maxGeneric; /**< max input index found */
119 int tmp0, colorTemp; /**< temp registers */
120 boolean firstInstruction;
121 };
122
123
124 /**
125 * TGSI declaration transform callback.
126 * Look for two free temp regs and available input reg for new texcoords.
127 */
128 static void
129 aa_transform_decl(struct tgsi_transform_context *ctx,
130 struct tgsi_full_declaration *decl)
131 {
132 struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
133
134 if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
135 decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR &&
136 decl->Semantic.SemanticIndex == 0) {
137 aactx->colorOutput = decl->DeclarationRange.First;
138 }
139 else if (decl->Declaration.File == TGSI_FILE_INPUT) {
140 if ((int) decl->DeclarationRange.Last > aactx->maxInput)
141 aactx->maxInput = decl->DeclarationRange.Last;
142 if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC &&
143 (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) {
144 aactx->maxGeneric = decl->Semantic.SemanticIndex;
145 }
146 }
147 else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
148 uint i;
149 for (i = decl->DeclarationRange.First;
150 i <= decl->DeclarationRange.Last; i++) {
151 aactx->tempsUsed |= (1 << i);
152 }
153 }
154
155 ctx->emit_declaration(ctx, decl);
156 }
157
158
159 /**
160 * TGSI instruction transform callback.
161 * Replace writes to result.color w/ a temp reg.
162 * Upon END instruction, insert texture sampling code for antialiasing.
163 */
164 static void
165 aa_transform_inst(struct tgsi_transform_context *ctx,
166 struct tgsi_full_instruction *inst)
167 {
168 struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
169 struct tgsi_full_instruction newInst;
170
171 if (aactx->firstInstruction) {
172 /* emit our new declarations before the first instruction */
173
174 struct tgsi_full_declaration decl;
175 const int texInput = aactx->maxInput + 1;
176 int tmp0;
177 uint i;
178
179 /* find two free temp regs */
180 for (i = 0; i < 32; i++) {
181 if ((aactx->tempsUsed & (1 << i)) == 0) {
182 /* found a free temp */
183 if (aactx->tmp0 < 0)
184 aactx->tmp0 = i;
185 else if (aactx->colorTemp < 0)
186 aactx->colorTemp = i;
187 else
188 break;
189 }
190 }
191
192 assert(aactx->colorTemp != aactx->tmp0);
193
194 tmp0 = aactx->tmp0;
195
196 /* declare new generic input/texcoord */
197 decl = tgsi_default_full_declaration();
198 decl.Declaration.File = TGSI_FILE_INPUT;
199 /* XXX this could be linear... */
200 decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
201 decl.Declaration.Semantic = 1;
202 decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
203 decl.Semantic.SemanticIndex = aactx->maxGeneric + 1;
204 decl.DeclarationRange.First =
205 decl.DeclarationRange.Last = texInput;
206 ctx->emit_declaration(ctx, &decl);
207
208 /* declare new temp regs */
209 decl = tgsi_default_full_declaration();
210 decl.Declaration.File = TGSI_FILE_TEMPORARY;
211 decl.DeclarationRange.First =
212 decl.DeclarationRange.Last = tmp0;
213 ctx->emit_declaration(ctx, &decl);
214
215 decl = tgsi_default_full_declaration();
216 decl.Declaration.File = TGSI_FILE_TEMPORARY;
217 decl.DeclarationRange.First =
218 decl.DeclarationRange.Last = aactx->colorTemp;
219 ctx->emit_declaration(ctx, &decl);
220
221 aactx->firstInstruction = FALSE;
222
223
224 /*
225 * Emit code to compute fragment coverage, kill if outside point radius
226 *
227 * Temp reg0 usage:
228 * t0.x = distance of fragment from center point
229 * t0.y = boolean, is t0.x > 1.0, also misc temp usage
230 * t0.z = temporary for computing 1/(1-k) value
231 * t0.w = final coverage value
232 */
233
234 /* MUL t0.xy, tex, tex; # compute x^2, y^2 */
235 newInst = tgsi_default_full_instruction();
236 newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
237 newInst.Instruction.NumDstRegs = 1;
238 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
239 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
240 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XY;
241 newInst.Instruction.NumSrcRegs = 2;
242 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
243 newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
244 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
245 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
246 ctx->emit_instruction(ctx, &newInst);
247
248 /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */
249 newInst = tgsi_default_full_instruction();
250 newInst.Instruction.Opcode = TGSI_OPCODE_ADD;
251 newInst.Instruction.NumDstRegs = 1;
252 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
253 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
254 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
255 newInst.Instruction.NumSrcRegs = 2;
256 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
257 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
258 newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
259 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
260 newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
261 newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
262 ctx->emit_instruction(ctx, &newInst);
263
264 #if NORMALIZE /* OPTIONAL normalization of length */
265 /* RSQ t0.x, t0.x; */
266 newInst = tgsi_default_full_instruction();
267 newInst.Instruction.Opcode = TGSI_OPCODE_RSQ;
268 newInst.Instruction.NumDstRegs = 1;
269 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
270 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
271 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
272 newInst.Instruction.NumSrcRegs = 1;
273 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
274 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
275 ctx->emit_instruction(ctx, &newInst);
276
277 /* RCP t0.x, t0.x; */
278 newInst = tgsi_default_full_instruction();
279 newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
280 newInst.Instruction.NumDstRegs = 1;
281 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
282 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
283 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
284 newInst.Instruction.NumSrcRegs = 1;
285 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
286 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
287 ctx->emit_instruction(ctx, &newInst);
288 #endif
289
290 /* SGT t0.y, t0.xxxx, t0.wwww; # bool b = d > 1 (NOTE t0.w == 1) */
291 newInst = tgsi_default_full_instruction();
292 newInst.Instruction.Opcode = TGSI_OPCODE_SGT;
293 newInst.Instruction.NumDstRegs = 1;
294 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
295 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
296 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
297 newInst.Instruction.NumSrcRegs = 2;
298 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
299 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
300 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
301 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
302 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
303 newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
304 ctx->emit_instruction(ctx, &newInst);
305
306 /* KIL -tmp0.yyyy; # if -tmp0.y < 0, KILL */
307 newInst = tgsi_default_full_instruction();
308 newInst.Instruction.Opcode = TGSI_OPCODE_KIL;
309 newInst.Instruction.NumDstRegs = 0;
310 newInst.Instruction.NumSrcRegs = 1;
311 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
312 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
313 newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
314 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
315 newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
316 newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
317 newInst.FullSrcRegisters[0].SrcRegister.Negate = 1;
318 ctx->emit_instruction(ctx, &newInst);
319
320
321 /* compute coverage factor = (1-d)/(1-k) */
322
323 /* SUB t0.z, tex.w, tex.z; # m = 1 - k */
324 newInst = tgsi_default_full_instruction();
325 newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
326 newInst.Instruction.NumDstRegs = 1;
327 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
328 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
329 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
330 newInst.Instruction.NumSrcRegs = 2;
331 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
332 newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
333 newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
334 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
335 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
336 newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z;
337 ctx->emit_instruction(ctx, &newInst);
338
339 /* RCP t0.z, t0.z; # t0.z = 1 / m */
340 newInst = tgsi_default_full_instruction();
341 newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
342 newInst.Instruction.NumDstRegs = 1;
343 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
344 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
345 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
346 newInst.Instruction.NumSrcRegs = 1;
347 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
348 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
349 newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z;
350 ctx->emit_instruction(ctx, &newInst);
351
352 /* SUB t0.y, 1, t0.x; # d = 1 - d */
353 newInst = tgsi_default_full_instruction();
354 newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
355 newInst.Instruction.NumDstRegs = 1;
356 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
357 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
358 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
359 newInst.Instruction.NumSrcRegs = 2;
360 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
361 newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
362 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
363 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
364 newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
365 newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
366 ctx->emit_instruction(ctx, &newInst);
367
368 /* MUL t0.w, t0.y, t0.z; # coverage = d * m */
369 newInst = tgsi_default_full_instruction();
370 newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
371 newInst.Instruction.NumDstRegs = 1;
372 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
373 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
374 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
375 newInst.Instruction.NumSrcRegs = 2;
376 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
377 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
378 newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
379 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
380 newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
381 newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z;
382 ctx->emit_instruction(ctx, &newInst);
383
384 /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */
385 newInst = tgsi_default_full_instruction();
386 newInst.Instruction.Opcode = TGSI_OPCODE_SLE;
387 newInst.Instruction.NumDstRegs = 1;
388 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
389 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
390 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
391 newInst.Instruction.NumSrcRegs = 2;
392 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
393 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
394 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
395 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
396 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
397 newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z;
398 ctx->emit_instruction(ctx, &newInst);
399
400 /* CMP t0.w, -t0.y, tex.w, t0.w;
401 * # if -t0.y < 0 then
402 * t0.w = 1
403 * else
404 * t0.w = t0.w
405 */
406 newInst = tgsi_default_full_instruction();
407 newInst.Instruction.Opcode = TGSI_OPCODE_CMP;
408 newInst.Instruction.NumDstRegs = 1;
409 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
410 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
411 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
412 newInst.Instruction.NumSrcRegs = 3;
413 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
414 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
415 newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
416 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
417 newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
418 newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
419 newInst.FullSrcRegisters[0].SrcRegister.Negate = 1;
420 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
421 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
422 newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
423 newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
424 newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
425 newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
426 newInst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY;
427 newInst.FullSrcRegisters[2].SrcRegister.Index = tmp0;
428 newInst.FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
429 newInst.FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
430 newInst.FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
431 newInst.FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
432 ctx->emit_instruction(ctx, &newInst);
433
434 }
435
436 if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
437 /* add alpha modulation code at tail of program */
438
439 /* MOV result.color.xyz, colorTemp; */
440 newInst = tgsi_default_full_instruction();
441 newInst.Instruction.Opcode = TGSI_OPCODE_MOV;
442 newInst.Instruction.NumDstRegs = 1;
443 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
444 newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput;
445 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ;
446 newInst.Instruction.NumSrcRegs = 1;
447 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
448 newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp;
449 ctx->emit_instruction(ctx, &newInst);
450
451 /* MUL result.color.w, colorTemp, tmp0.w; */
452 newInst = tgsi_default_full_instruction();
453 newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
454 newInst.Instruction.NumDstRegs = 1;
455 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
456 newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput;
457 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
458 newInst.Instruction.NumSrcRegs = 2;
459 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
460 newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp;
461 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
462 newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->tmp0;
463 ctx->emit_instruction(ctx, &newInst);
464 }
465 else {
466 /* Not an END instruction.
467 * Look for writes to result.color and replace with colorTemp reg.
468 */
469 uint i;
470
471 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
472 struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
473 if (dst->DstRegister.File == TGSI_FILE_OUTPUT &&
474 dst->DstRegister.Index == aactx->colorOutput) {
475 dst->DstRegister.File = TGSI_FILE_TEMPORARY;
476 dst->DstRegister.Index = aactx->colorTemp;
477 }
478 }
479 }
480
481 ctx->emit_instruction(ctx, inst);
482 }
483
484
485 /**
486 * Generate the frag shader we'll use for drawing AA points.
487 * This will be the user's shader plus some texture/modulate instructions.
488 */
489 static boolean
490 generate_aapoint_fs(struct aapoint_stage *aapoint)
491 {
492 const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
493 struct pipe_shader_state aapoint_fs;
494 struct aa_transform_context transform;
495
496 #define MAX 1000
497
498 aapoint_fs = *orig_fs; /* copy to init */
499 aapoint_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX);
500 if (aapoint_fs.tokens == NULL)
501 return FALSE;
502
503 memset(&transform, 0, sizeof(transform));
504 transform.colorOutput = -1;
505 transform.maxInput = -1;
506 transform.maxGeneric = -1;
507 transform.colorTemp = -1;
508 transform.tmp0 = -1;
509 transform.firstInstruction = TRUE;
510 transform.base.transform_instruction = aa_transform_inst;
511 transform.base.transform_declaration = aa_transform_decl;
512
513 tgsi_transform_shader(orig_fs->tokens,
514 (struct tgsi_token *) aapoint_fs.tokens,
515 MAX, &transform.base);
516
517 #if 0 /* DEBUG */
518 printf("draw_aapoint, orig shader:\n");
519 tgsi_dump(orig_fs->tokens, 0);
520 printf("draw_aapoint, new shader:\n");
521 tgsi_dump(aapoint_fs.tokens, 0);
522 #endif
523
524 aapoint->fs->aapoint_fs
525 = aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs);
526 if (aapoint->fs->aapoint_fs == NULL)
527 return FALSE;
528
529 aapoint->fs->generic_attrib = transform.maxGeneric + 1;
530
531 return TRUE;
532 }
533
534
535 /**
536 * When we're about to draw our first AA point in a batch, this function is
537 * called to tell the driver to bind our modified fragment shader.
538 */
539 static boolean
540 bind_aapoint_fragment_shader(struct aapoint_stage *aapoint)
541 {
542 struct draw_context *draw = aapoint->stage.draw;
543
544 if (!aapoint->fs->aapoint_fs &&
545 !generate_aapoint_fs(aapoint))
546 return FALSE;
547
548 draw->suspend_flushing = TRUE;
549 aapoint->driver_bind_fs_state(aapoint->pipe, aapoint->fs->aapoint_fs);
550 draw->suspend_flushing = FALSE;
551
552 return TRUE;
553 }
554
555
556
557 static INLINE struct aapoint_stage *
558 aapoint_stage( struct draw_stage *stage )
559 {
560 return (struct aapoint_stage *) stage;
561 }
562
563
564
565
566 /**
567 * Draw an AA point by drawing a quad.
568 */
569 static void
570 aapoint_point(struct draw_stage *stage, struct prim_header *header)
571 {
572 const struct aapoint_stage *aapoint = aapoint_stage(stage);
573 struct prim_header tri;
574 struct vertex_header *v[4];
575 uint texPos = aapoint->tex_slot;
576 uint pos_slot = aapoint->pos_slot;
577 float radius, *pos, *tex;
578 uint i;
579 float k;
580
581 if (aapoint->psize_slot >= 0) {
582 radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0];
583 }
584 else {
585 radius = aapoint->radius;
586 }
587
588 /*
589 * Note: the texcoords (generic attrib, really) we use are special:
590 * The S and T components simply vary from -1 to +1.
591 * The R component is k, below.
592 * The Q component is 1.0 and will used as a handy constant in the
593 * fragment shader.
594 */
595
596 /*
597 * k is the threshold distance from the point's center at which
598 * we begin alpha attenuation (the coverage value).
599 * Operating within a unit circle, we'll compute the fragment's
600 * distance 'd' from the center point using the texcoords.
601 * IF d > 1.0 THEN
602 * KILL fragment
603 * ELSE IF d > k THEN
604 * compute coverage in [0,1] proportional to d in [k, 1].
605 * ELSE
606 * coverage = 1.0; // full coverage
607 * ENDIF
608 *
609 * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to
610 * avoid using IF/ELSE/ENDIF TGSI opcodes.
611 */
612
613 #if !NORMALIZE
614 k = 1.0f / radius;
615 k = 1.0f - 2.0f * k + k * k;
616 #else
617 k = 1.0f - 1.0f / radius;
618 #endif
619
620 /* allocate/dup new verts */
621 for (i = 0; i < 4; i++) {
622 v[i] = dup_vert(stage, header->v[0], i);
623 }
624
625 /* new verts */
626 pos = v[0]->data[pos_slot];
627 pos[0] -= radius;
628 pos[1] -= radius;
629
630 pos = v[1]->data[pos_slot];
631 pos[0] += radius;
632 pos[1] -= radius;
633
634 pos = v[2]->data[pos_slot];
635 pos[0] += radius;
636 pos[1] += radius;
637
638 pos = v[3]->data[pos_slot];
639 pos[0] -= radius;
640 pos[1] += radius;
641
642 /* new texcoords */
643 tex = v[0]->data[texPos];
644 ASSIGN_4V(tex, -1, -1, k, 1);
645
646 tex = v[1]->data[texPos];
647 ASSIGN_4V(tex, 1, -1, k, 1);
648
649 tex = v[2]->data[texPos];
650 ASSIGN_4V(tex, 1, 1, k, 1);
651
652 tex = v[3]->data[texPos];
653 ASSIGN_4V(tex, -1, 1, k, 1);
654
655 /* emit 2 tris for the quad strip */
656 tri.v[0] = v[0];
657 tri.v[1] = v[1];
658 tri.v[2] = v[2];
659 stage->next->tri( stage->next, &tri );
660
661 tri.v[0] = v[0];
662 tri.v[1] = v[2];
663 tri.v[2] = v[3];
664 stage->next->tri( stage->next, &tri );
665 }
666
667
668 static void
669 aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
670 {
671 auto struct aapoint_stage *aapoint = aapoint_stage(stage);
672 struct draw_context *draw = stage->draw;
673
674 assert(draw->rasterizer->point_smooth);
675
676 if (draw->rasterizer->point_size <= 2.0)
677 aapoint->radius = 1.0;
678 else
679 aapoint->radius = 0.5f * draw->rasterizer->point_size;
680
681 /*
682 * Bind (generate) our fragprog.
683 */
684 bind_aapoint_fragment_shader(aapoint);
685
686 /* update vertex attrib info */
687 aapoint->tex_slot = draw->vs.num_vs_outputs;
688 assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
689
690 aapoint->pos_slot = draw->vs.position_output;
691
692 draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
693 draw->extra_vp_outputs.semantic_index = aapoint->fs->generic_attrib;
694 draw->extra_vp_outputs.slot = aapoint->tex_slot;
695
696 /* find psize slot in post-transform vertex */
697 aapoint->psize_slot = -1;
698 if (draw->rasterizer->point_size_per_vertex) {
699 /* find PSIZ vertex output */
700 const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
701 uint i;
702 for (i = 0; i < vs->info.num_outputs; i++) {
703 if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
704 aapoint->psize_slot = i;
705 break;
706 }
707 }
708 }
709
710 /* now really draw first point */
711 stage->point = aapoint_point;
712 stage->point(stage, header);
713 }
714
715
716 static void
717 aapoint_flush(struct draw_stage *stage, unsigned flags)
718 {
719 struct draw_context *draw = stage->draw;
720 struct aapoint_stage *aapoint = aapoint_stage(stage);
721 struct pipe_context *pipe = aapoint->pipe;
722
723 stage->point = aapoint_first_point;
724 stage->next->flush( stage->next, flags );
725
726 /* restore original frag shader */
727 draw->suspend_flushing = TRUE;
728 aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs);
729 draw->suspend_flushing = FALSE;
730
731 draw->extra_vp_outputs.slot = 0;
732 }
733
734
735 static void
736 aapoint_reset_stipple_counter(struct draw_stage *stage)
737 {
738 stage->next->reset_stipple_counter( stage->next );
739 }
740
741
742 static void
743 aapoint_destroy(struct draw_stage *stage)
744 {
745 draw_free_temp_verts( stage );
746 FREE( stage );
747 }
748
749
750 static struct aapoint_stage *
751 draw_aapoint_stage(struct draw_context *draw)
752 {
753 struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage);
754 if (aapoint == NULL)
755 goto fail;
756
757 if (!draw_alloc_temp_verts( &aapoint->stage, 4 ))
758 goto fail;
759
760 aapoint->stage.draw = draw;
761 aapoint->stage.next = NULL;
762 aapoint->stage.point = aapoint_first_point;
763 aapoint->stage.line = draw_pipe_passthrough_line;
764 aapoint->stage.tri = draw_pipe_passthrough_tri;
765 aapoint->stage.flush = aapoint_flush;
766 aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter;
767 aapoint->stage.destroy = aapoint_destroy;
768
769 return aapoint;
770
771 fail:
772 if (aapoint)
773 aapoint_destroy(&aapoint->stage);
774
775 return NULL;
776
777 }
778
779
780 static struct aapoint_stage *
781 aapoint_stage_from_pipe(struct pipe_context *pipe)
782 {
783 struct draw_context *draw = (struct draw_context *) pipe->draw;
784 return aapoint_stage(draw->pipeline.aapoint);
785 }
786
787
788 /**
789 * This function overrides the driver's create_fs_state() function and
790 * will typically be called by the state tracker.
791 */
792 static void *
793 aapoint_create_fs_state(struct pipe_context *pipe,
794 const struct pipe_shader_state *fs)
795 {
796 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
797 struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader);
798 if (aafs == NULL)
799 return NULL;
800
801 aafs->state = *fs;
802
803 /* pass-through */
804 aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs);
805
806 return aafs;
807 }
808
809
810 static void
811 aapoint_bind_fs_state(struct pipe_context *pipe, void *fs)
812 {
813 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
814 struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
815 /* save current */
816 aapoint->fs = aafs;
817 /* pass-through */
818 aapoint->driver_bind_fs_state(aapoint->pipe,
819 (aafs ? aafs->driver_fs : NULL));
820 }
821
822
823 static void
824 aapoint_delete_fs_state(struct pipe_context *pipe, void *fs)
825 {
826 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
827 struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
828 /* pass-through */
829 aapoint->driver_delete_fs_state(aapoint->pipe, aafs->driver_fs);
830 FREE(aafs);
831 }
832
833
834 /**
835 * Called by drivers that want to install this AA point prim stage
836 * into the draw module's pipeline. This will not be used if the
837 * hardware has native support for AA points.
838 */
839 boolean
840 draw_install_aapoint_stage(struct draw_context *draw,
841 struct pipe_context *pipe)
842 {
843 struct aapoint_stage *aapoint;
844
845 pipe->draw = (void *) draw;
846
847 /*
848 * Create / install AA point drawing / prim stage
849 */
850 aapoint = draw_aapoint_stage( draw );
851 if (aapoint == NULL)
852 goto fail;
853
854 aapoint->pipe = pipe;
855
856 /* save original driver functions */
857 aapoint->driver_create_fs_state = pipe->create_fs_state;
858 aapoint->driver_bind_fs_state = pipe->bind_fs_state;
859 aapoint->driver_delete_fs_state = pipe->delete_fs_state;
860
861 /* override the driver's functions */
862 pipe->create_fs_state = aapoint_create_fs_state;
863 pipe->bind_fs_state = aapoint_bind_fs_state;
864 pipe->delete_fs_state = aapoint_delete_fs_state;
865
866 draw->pipeline.aapoint = &aapoint->stage;
867
868 return TRUE;
869
870 fail:
871 if (aapoint)
872 aapoint->stage.destroy( &aapoint->stage );
873
874 return FALSE;
875 }