draw: associate rhw divide with clipping not viewport flag
[mesa.git] / src / gallium / auxiliary / draw / draw_aapoint.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * AA point stage: AA points are converted to quads and rendered with a
30 * special fragment shader. Another approach would be to use a texture
31 * map image of a point, but experiments indicate the quality isn't nearly
32 * as good as this approach.
33 *
34 * Note: this looks a lot like draw_aaline.c but there's actually little
35 * if any code that can be shared.
36 *
37 * Authors: Brian Paul
38 */
39
40
41 #include "pipe/p_util.h"
42 #include "pipe/p_inlines.h"
43 #include "pipe/p_context.h"
44 #include "pipe/p_defines.h"
45 #include "pipe/p_shader_tokens.h"
46
47 #include "tgsi/util/tgsi_transform.h"
48 #include "tgsi/util/tgsi_dump.h"
49
50 #include "draw_context.h"
51 #include "draw_private.h"
52
53
54 /*
55 * Enabling NORMALIZE might give _slightly_ better results.
56 * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or
57 * d=x*x+y*y. Since we're working with a unit circle, the later seems
58 * close enough and saves some costly instructions.
59 */
60 #define NORMALIZE 0
61
62
63 /**
64 * Subclass of pipe_shader_state to carry extra fragment shader info.
65 */
66 struct aapoint_fragment_shader
67 {
68 struct pipe_shader_state state;
69 void *driver_fs; /**< the regular shader */
70 void *aapoint_fs; /**< the aa point-augmented shader */
71 int generic_attrib; /**< The generic input attrib/texcoord we'll use */
72 };
73
74
75 /**
76 * Subclass of draw_stage
77 */
78 struct aapoint_stage
79 {
80 struct draw_stage stage;
81
82 int psize_slot;
83 float radius;
84
85 /** this is the vertex attrib slot for the new texcoords */
86 uint tex_slot;
87
88 /*
89 * Currently bound state
90 */
91 struct aapoint_fragment_shader *fs;
92
93 /*
94 * Driver interface/override functions
95 */
96 void * (*driver_create_fs_state)(struct pipe_context *,
97 const struct pipe_shader_state *);
98 void (*driver_bind_fs_state)(struct pipe_context *, void *);
99 void (*driver_delete_fs_state)(struct pipe_context *, void *);
100
101 struct pipe_context *pipe;
102 };
103
104
105
106 /**
107 * Subclass of tgsi_transform_context, used for transforming the
108 * user's fragment shader to add the special AA instructions.
109 */
110 struct aa_transform_context {
111 struct tgsi_transform_context base;
112 uint tempsUsed; /**< bitmask */
113 int colorOutput; /**< which output is the primary color */
114 int maxInput, maxGeneric; /**< max input index found */
115 int tmp0, colorTemp; /**< temp registers */
116 boolean firstInstruction;
117 };
118
119
120 /**
121 * TGSI declaration transform callback.
122 * Look for two free temp regs and available input reg for new texcoords.
123 */
124 static void
125 aa_transform_decl(struct tgsi_transform_context *ctx,
126 struct tgsi_full_declaration *decl)
127 {
128 struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
129
130 if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
131 decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR &&
132 decl->Semantic.SemanticIndex == 0) {
133 aactx->colorOutput = decl->u.DeclarationRange.First;
134 }
135 else if (decl->Declaration.File == TGSI_FILE_INPUT) {
136 if ((int) decl->u.DeclarationRange.Last > aactx->maxInput)
137 aactx->maxInput = decl->u.DeclarationRange.Last;
138 if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC &&
139 (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) {
140 aactx->maxGeneric = decl->Semantic.SemanticIndex;
141 }
142 }
143 else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
144 uint i;
145 for (i = decl->u.DeclarationRange.First;
146 i <= decl->u.DeclarationRange.Last; i++) {
147 aactx->tempsUsed |= (1 << i);
148 }
149 }
150
151 ctx->emit_declaration(ctx, decl);
152 }
153
154
155 /**
156 * TGSI instruction transform callback.
157 * Replace writes to result.color w/ a temp reg.
158 * Upon END instruction, insert texture sampling code for antialiasing.
159 */
160 static void
161 aa_transform_inst(struct tgsi_transform_context *ctx,
162 struct tgsi_full_instruction *inst)
163 {
164 struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
165 struct tgsi_full_instruction newInst;
166
167 if (aactx->firstInstruction) {
168 /* emit our new declarations before the first instruction */
169
170 struct tgsi_full_declaration decl;
171 const int texInput = aactx->maxInput + 1;
172 int tmp0;
173 uint i;
174
175 /* find two free temp regs */
176 for (i = 0; i < 32; i++) {
177 if ((aactx->tempsUsed & (1 << i)) == 0) {
178 /* found a free temp */
179 if (aactx->tmp0 < 0)
180 aactx->tmp0 = i;
181 else if (aactx->colorTemp < 0)
182 aactx->colorTemp = i;
183 else
184 break;
185 }
186 }
187
188 assert(aactx->colorTemp != aactx->tmp0);
189
190 tmp0 = aactx->tmp0;
191
192 /* declare new generic input/texcoord */
193 decl = tgsi_default_full_declaration();
194 decl.Declaration.File = TGSI_FILE_INPUT;
195 decl.Declaration.Semantic = 1;
196 decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
197 decl.Semantic.SemanticIndex = aactx->maxGeneric + 1;
198 decl.Declaration.Interpolate = 1;
199 /* XXX this could be linear... */
200 decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
201 decl.u.DeclarationRange.First =
202 decl.u.DeclarationRange.Last = texInput;
203 ctx->emit_declaration(ctx, &decl);
204
205 /* declare new temp regs */
206 decl = tgsi_default_full_declaration();
207 decl.Declaration.File = TGSI_FILE_TEMPORARY;
208 decl.u.DeclarationRange.First =
209 decl.u.DeclarationRange.Last = tmp0;
210 ctx->emit_declaration(ctx, &decl);
211
212 decl = tgsi_default_full_declaration();
213 decl.Declaration.File = TGSI_FILE_TEMPORARY;
214 decl.u.DeclarationRange.First =
215 decl.u.DeclarationRange.Last = aactx->colorTemp;
216 ctx->emit_declaration(ctx, &decl);
217
218 aactx->firstInstruction = FALSE;
219
220
221 /*
222 * Emit code to compute fragment coverage, kill if outside point radius
223 *
224 * Temp reg0 usage:
225 * t0.x = distance of fragment from center point
226 * t0.y = boolean, is t0.x > 1.0, also misc temp usage
227 * t0.z = temporary for computing 1/(1-k) value
228 * t0.w = final coverage value
229 */
230
231 /* MUL t0.xy, tex, tex; # compute x^2, y^2 */
232 newInst = tgsi_default_full_instruction();
233 newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
234 newInst.Instruction.NumDstRegs = 1;
235 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
236 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
237 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XY;
238 newInst.Instruction.NumSrcRegs = 2;
239 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
240 newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
241 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
242 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
243 ctx->emit_instruction(ctx, &newInst);
244
245 /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */
246 newInst = tgsi_default_full_instruction();
247 newInst.Instruction.Opcode = TGSI_OPCODE_ADD;
248 newInst.Instruction.NumDstRegs = 1;
249 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
250 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
251 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
252 newInst.Instruction.NumSrcRegs = 2;
253 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
254 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
255 newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
256 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
257 newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
258 newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
259 ctx->emit_instruction(ctx, &newInst);
260
261 #if NORMALIZE /* OPTIONAL normalization of length */
262 /* RSQ t0.x, t0.x; */
263 newInst = tgsi_default_full_instruction();
264 newInst.Instruction.Opcode = TGSI_OPCODE_RSQ;
265 newInst.Instruction.NumDstRegs = 1;
266 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
267 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
268 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
269 newInst.Instruction.NumSrcRegs = 1;
270 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
271 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
272 ctx->emit_instruction(ctx, &newInst);
273
274 /* RCP t0.x, t0.x; */
275 newInst = tgsi_default_full_instruction();
276 newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
277 newInst.Instruction.NumDstRegs = 1;
278 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
279 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
280 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
281 newInst.Instruction.NumSrcRegs = 1;
282 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
283 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
284 ctx->emit_instruction(ctx, &newInst);
285 #endif
286
287 /* SGT t0.y, t0.xxxx, t0.wwww; # bool b = d > 1 (NOTE t0.w == 1) */
288 newInst = tgsi_default_full_instruction();
289 newInst.Instruction.Opcode = TGSI_OPCODE_SGT;
290 newInst.Instruction.NumDstRegs = 1;
291 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
292 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
293 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
294 newInst.Instruction.NumSrcRegs = 2;
295 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
296 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
297 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
298 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
299 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
300 newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
301 ctx->emit_instruction(ctx, &newInst);
302
303 /* KILP -t0.yyyy; # if b, KILL */
304 newInst = tgsi_default_full_instruction();
305 newInst.Instruction.Opcode = TGSI_OPCODE_KILP;
306 newInst.Instruction.NumDstRegs = 0;
307 newInst.Instruction.NumSrcRegs = 1;
308 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
309 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
310 newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
311 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
312 newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
313 newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
314 newInst.FullSrcRegisters[0].SrcRegister.Negate = 1;
315 ctx->emit_instruction(ctx, &newInst);
316
317
318 /* compute coverage factor = (1-d)/(1-k) */
319
320 /* SUB t0.z, tex.w, tex.z; # m = 1 - k */
321 newInst = tgsi_default_full_instruction();
322 newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
323 newInst.Instruction.NumDstRegs = 1;
324 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
325 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
326 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
327 newInst.Instruction.NumSrcRegs = 2;
328 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
329 newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
330 newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
331 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
332 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
333 newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z;
334 ctx->emit_instruction(ctx, &newInst);
335
336 /* RCP t0.z, t0.z; # t0.z = 1 / m */
337 newInst = tgsi_default_full_instruction();
338 newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
339 newInst.Instruction.NumDstRegs = 1;
340 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
341 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
342 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
343 newInst.Instruction.NumSrcRegs = 1;
344 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
345 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
346 newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z;
347 ctx->emit_instruction(ctx, &newInst);
348
349 /* SUB t0.y, 1, t0.x; # d = 1 - d */
350 newInst = tgsi_default_full_instruction();
351 newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
352 newInst.Instruction.NumDstRegs = 1;
353 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
354 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
355 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
356 newInst.Instruction.NumSrcRegs = 2;
357 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
358 newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
359 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
360 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
361 newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
362 newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
363 ctx->emit_instruction(ctx, &newInst);
364
365 /* MUL t0.w, t0.y, t0.z; # coverage = d * m */
366 newInst = tgsi_default_full_instruction();
367 newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
368 newInst.Instruction.NumDstRegs = 1;
369 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
370 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
371 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
372 newInst.Instruction.NumSrcRegs = 2;
373 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
374 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
375 newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
376 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
377 newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
378 newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z;
379 ctx->emit_instruction(ctx, &newInst);
380
381 /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */
382 newInst = tgsi_default_full_instruction();
383 newInst.Instruction.Opcode = TGSI_OPCODE_SLE;
384 newInst.Instruction.NumDstRegs = 1;
385 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
386 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
387 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
388 newInst.Instruction.NumSrcRegs = 2;
389 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
390 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
391 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
392 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
393 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
394 newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z;
395 ctx->emit_instruction(ctx, &newInst);
396
397 /* CMP t0.w, -t0.y, tex.w, t0.w;
398 * # if -t0.y < 0 then
399 * t0.w = 1
400 * else
401 * t0.w = t0.w
402 */
403 newInst = tgsi_default_full_instruction();
404 newInst.Instruction.Opcode = TGSI_OPCODE_CMP;
405 newInst.Instruction.NumDstRegs = 1;
406 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
407 newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
408 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
409 newInst.Instruction.NumSrcRegs = 3;
410 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
411 newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
412 newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
413 newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
414 newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
415 newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
416 newInst.FullSrcRegisters[0].SrcRegister.Negate = 1;
417 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
418 newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
419 newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
420 newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
421 newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
422 newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
423 newInst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY;
424 newInst.FullSrcRegisters[2].SrcRegister.Index = tmp0;
425 newInst.FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
426 newInst.FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
427 newInst.FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
428 newInst.FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
429 ctx->emit_instruction(ctx, &newInst);
430
431 }
432
433 if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
434 /* add alpha modulation code at tail of program */
435
436 /* MOV result.color.xyz, colorTemp; */
437 newInst = tgsi_default_full_instruction();
438 newInst.Instruction.Opcode = TGSI_OPCODE_MOV;
439 newInst.Instruction.NumDstRegs = 1;
440 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
441 newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput;
442 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ;
443 newInst.Instruction.NumSrcRegs = 1;
444 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
445 newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp;
446 ctx->emit_instruction(ctx, &newInst);
447
448 /* MUL result.color.w, colorTemp, tmp0.w; */
449 newInst = tgsi_default_full_instruction();
450 newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
451 newInst.Instruction.NumDstRegs = 1;
452 newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
453 newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput;
454 newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
455 newInst.Instruction.NumSrcRegs = 2;
456 newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
457 newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp;
458 newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
459 newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->tmp0;
460 ctx->emit_instruction(ctx, &newInst);
461 }
462 else {
463 /* Not an END instruction.
464 * Look for writes to result.color and replace with colorTemp reg.
465 */
466 uint i;
467
468 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
469 struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
470 if (dst->DstRegister.File == TGSI_FILE_OUTPUT &&
471 dst->DstRegister.Index == aactx->colorOutput) {
472 dst->DstRegister.File = TGSI_FILE_TEMPORARY;
473 dst->DstRegister.Index = aactx->colorTemp;
474 }
475 }
476 }
477
478 ctx->emit_instruction(ctx, inst);
479 }
480
481
482 /**
483 * Generate the frag shader we'll use for drawing AA lines.
484 * This will be the user's shader plus some texture/modulate instructions.
485 */
486 static void
487 generate_aapoint_fs(struct aapoint_stage *aapoint)
488 {
489 const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
490 struct pipe_shader_state aapoint_fs;
491 struct aa_transform_context transform;
492
493 #define MAX 1000
494
495 aapoint_fs = *orig_fs; /* copy to init */
496 aapoint_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX);
497
498 memset(&transform, 0, sizeof(transform));
499 transform.colorOutput = -1;
500 transform.maxInput = -1;
501 transform.maxGeneric = -1;
502 transform.colorTemp = -1;
503 transform.tmp0 = -1;
504 transform.firstInstruction = TRUE;
505 transform.base.transform_instruction = aa_transform_inst;
506 transform.base.transform_declaration = aa_transform_decl;
507
508 tgsi_transform_shader(orig_fs->tokens,
509 (struct tgsi_token *) aapoint_fs.tokens,
510 MAX, &transform.base);
511
512 #if 0 /* DEBUG */
513 printf("draw_aapoint, orig shader:\n");
514 tgsi_dump(orig_fs->tokens, 0);
515 printf("draw_aapoint, new shader:\n");
516 tgsi_dump(aapoint_fs.tokens, 0);
517 #endif
518
519 aapoint->fs->aapoint_fs
520 = aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs);
521
522 aapoint->fs->generic_attrib = transform.maxGeneric + 1;
523 }
524
525
526 /**
527 * When we're about to draw our first AA line in a batch, this function is
528 * called to tell the driver to bind our modified fragment shader.
529 */
530 static void
531 bind_aapoint_fragment_shader(struct aapoint_stage *aapoint)
532 {
533 if (!aapoint->fs->aapoint_fs) {
534 generate_aapoint_fs(aapoint);
535 }
536 aapoint->driver_bind_fs_state(aapoint->pipe, aapoint->fs->aapoint_fs);
537 }
538
539
540
541 static INLINE struct aapoint_stage *
542 aapoint_stage( struct draw_stage *stage )
543 {
544 return (struct aapoint_stage *) stage;
545 }
546
547
548 static void
549 passthrough_line(struct draw_stage *stage, struct prim_header *header)
550 {
551 stage->next->line(stage->next, header);
552 }
553
554
555 static void
556 passthrough_tri(struct draw_stage *stage, struct prim_header *header)
557 {
558 stage->next->tri(stage->next, header);
559 }
560
561
562 /**
563 * Draw an AA point by drawing a quad.
564 */
565 static void
566 aapoint_point(struct draw_stage *stage, struct prim_header *header)
567 {
568 const struct aapoint_stage *aapoint = aapoint_stage(stage);
569 struct prim_header tri;
570 struct vertex_header *v[4];
571 uint texPos = aapoint->tex_slot;
572 float radius, *pos, *tex;
573 uint i;
574 float k;
575
576 if (aapoint->psize_slot >= 0) {
577 radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0];
578 }
579 else {
580 radius = aapoint->radius;
581 }
582
583 /*
584 * Note: the texcoords (generic attrib, really) we use are special:
585 * The S and T components simply vary from -1 to +1.
586 * The R component is k, below.
587 * The Q component is 1.0 and will used as a handy constant in the
588 * fragment shader.
589 */
590
591 /*
592 * k is the threshold distance from the point's center at which
593 * we begin alpha attenuation (the coverage value).
594 * Operating within a unit circle, we'll compute the fragment's
595 * distance 'd' from the center point using the texcoords.
596 * IF d > 1.0 THEN
597 * KILL fragment
598 * ELSE IF d > k THEN
599 * compute coverage in [0,1] proportional to d in [k, 1].
600 * ELSE
601 * coverage = 1.0; // full coverage
602 * ENDIF
603 *
604 * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to
605 * avoid using IF/ELSE/ENDIF TGSI opcodes.
606 */
607
608 #if !NORMALIZE
609 k = 1.0f / radius;
610 k = 1.0f - 2.0f * k + k * k;
611 #else
612 k = 1.0f - 1.0f / radius;
613 #endif
614
615 /* allocate/dup new verts */
616 for (i = 0; i < 4; i++) {
617 v[i] = dup_vert(stage, header->v[0], i);
618 }
619
620 /* new verts */
621 pos = v[0]->data[0];
622 pos[0] -= radius;
623 pos[1] -= radius;
624
625 pos = v[1]->data[0];
626 pos[0] += radius;
627 pos[1] -= radius;
628
629 pos = v[2]->data[0];
630 pos[0] += radius;
631 pos[1] += radius;
632
633 pos = v[3]->data[0];
634 pos[0] -= radius;
635 pos[1] += radius;
636
637 /* new texcoords */
638 tex = v[0]->data[texPos];
639 ASSIGN_4V(tex, -1, -1, k, 1);
640
641 tex = v[1]->data[texPos];
642 ASSIGN_4V(tex, 1, -1, k, 1);
643
644 tex = v[2]->data[texPos];
645 ASSIGN_4V(tex, 1, 1, k, 1);
646
647 tex = v[3]->data[texPos];
648 ASSIGN_4V(tex, -1, 1, k, 1);
649
650 /* emit 2 tris for the quad strip */
651 tri.v[0] = v[0];
652 tri.v[1] = v[1];
653 tri.v[2] = v[2];
654 stage->next->tri( stage->next, &tri );
655
656 tri.v[0] = v[0];
657 tri.v[1] = v[2];
658 tri.v[2] = v[3];
659 stage->next->tri( stage->next, &tri );
660 }
661
662
663 static void
664 aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
665 {
666 auto struct aapoint_stage *aapoint = aapoint_stage(stage);
667 struct draw_context *draw = stage->draw;
668
669 assert(draw->rasterizer->point_smooth);
670
671 if (draw->rasterizer->point_size <= 2.0)
672 aapoint->radius = 1.0;
673 else
674 aapoint->radius = 0.5f * draw->rasterizer->point_size;
675
676 /*
677 * Bind (generate) our fragprog.
678 */
679 bind_aapoint_fragment_shader(aapoint);
680
681 /* update vertex attrib info */
682 aapoint->tex_slot = draw->num_vs_outputs;
683 assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
684
685 draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
686 draw->extra_vp_outputs.semantic_index = aapoint->fs->generic_attrib;
687 draw->extra_vp_outputs.slot = aapoint->tex_slot;
688
689 /* find psize slot in post-transform vertex */
690 aapoint->psize_slot = -1;
691 if (draw->rasterizer->point_size_per_vertex) {
692 /* find PSIZ vertex output */
693 const struct draw_vertex_shader *vs = draw->vertex_shader;
694 uint i;
695 for (i = 0; i < vs->info.num_outputs; i++) {
696 if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
697 aapoint->psize_slot = i;
698 break;
699 }
700 }
701 }
702
703 /* now really draw first line */
704 stage->point = aapoint_point;
705 stage->point(stage, header);
706 }
707
708
709 static void
710 aapoint_flush(struct draw_stage *stage, unsigned flags)
711 {
712 struct draw_context *draw = stage->draw;
713 struct aapoint_stage *aapoint = aapoint_stage(stage);
714 struct pipe_context *pipe = aapoint->pipe;
715
716 stage->point = aapoint_first_point;
717 stage->next->flush( stage->next, flags );
718
719 /* restore original frag shader */
720 aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs);
721
722 draw->extra_vp_outputs.slot = 0;
723 }
724
725
726 static void
727 aapoint_reset_stipple_counter(struct draw_stage *stage)
728 {
729 stage->next->reset_stipple_counter( stage->next );
730 }
731
732
733 static void
734 aapoint_destroy(struct draw_stage *stage)
735 {
736 draw_free_temp_verts( stage );
737 FREE( stage );
738 }
739
740
741 static struct aapoint_stage *
742 draw_aapoint_stage(struct draw_context *draw)
743 {
744 struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage);
745
746 draw_alloc_temp_verts( &aapoint->stage, 4 );
747
748 aapoint->stage.draw = draw;
749 aapoint->stage.next = NULL;
750 aapoint->stage.point = aapoint_first_point;
751 aapoint->stage.line = passthrough_line;
752 aapoint->stage.tri = passthrough_tri;
753 aapoint->stage.flush = aapoint_flush;
754 aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter;
755 aapoint->stage.destroy = aapoint_destroy;
756
757 return aapoint;
758 }
759
760
761 static struct aapoint_stage *
762 aapoint_stage_from_pipe(struct pipe_context *pipe)
763 {
764 struct draw_context *draw = (struct draw_context *) pipe->draw;
765 return aapoint_stage(draw->pipeline.aapoint);
766 }
767
768
769 /**
770 * This function overrides the driver's create_fs_state() function and
771 * will typically be called by the state tracker.
772 */
773 static void *
774 aapoint_create_fs_state(struct pipe_context *pipe,
775 const struct pipe_shader_state *fs)
776 {
777 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
778 struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader);
779
780 if (aafs) {
781 aafs->state = *fs;
782
783 /* pass-through */
784 aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs);
785 }
786
787 return aafs;
788 }
789
790
791 static void
792 aapoint_bind_fs_state(struct pipe_context *pipe, void *fs)
793 {
794 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
795 struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
796 /* save current */
797 aapoint->fs = aafs;
798 /* pass-through */
799 aapoint->driver_bind_fs_state(aapoint->pipe,
800 (aafs ? aafs->driver_fs : NULL));
801 }
802
803
804 static void
805 aapoint_delete_fs_state(struct pipe_context *pipe, void *fs)
806 {
807 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
808 struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
809 /* pass-through */
810 aapoint->driver_delete_fs_state(aapoint->pipe, aafs->driver_fs);
811 FREE(aafs);
812 }
813
814
815 /**
816 * Called by drivers that want to install this AA point prim stage
817 * into the draw module's pipeline. This will not be used if the
818 * hardware has native support for AA points.
819 */
820 void
821 draw_install_aapoint_stage(struct draw_context *draw,
822 struct pipe_context *pipe)
823 {
824 struct aapoint_stage *aapoint;
825
826 pipe->draw = (void *) draw;
827
828 /*
829 * Create / install AA point drawing / prim stage
830 */
831 aapoint = draw_aapoint_stage( draw );
832 assert(aapoint);
833 draw->pipeline.aapoint = &aapoint->stage;
834
835 aapoint->pipe = pipe;
836
837 /* save original driver functions */
838 aapoint->driver_create_fs_state = pipe->create_fs_state;
839 aapoint->driver_bind_fs_state = pipe->bind_fs_state;
840 aapoint->driver_delete_fs_state = pipe->delete_fs_state;
841
842 /* override the driver's functions */
843 pipe->create_fs_state = aapoint_create_fs_state;
844 pipe->bind_fs_state = aapoint_bind_fs_state;
845 pipe->delete_fs_state = aapoint_delete_fs_state;
846 }