draw: share machine
authorKeith Whitwell <keith@tungstengraphics.com>
Wed, 28 May 2008 23:17:53 +0000 (00:17 +0100)
committerKeith Whitwell <keith@tungstengraphics.com>
Wed, 28 May 2008 23:17:53 +0000 (00:17 +0100)
src/gallium/auxiliary/draw/Makefile
src/gallium/auxiliary/draw/SConscript
src/gallium/auxiliary/draw/draw_context.c
src/gallium/auxiliary/draw/draw_private.h
src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
src/gallium/auxiliary/draw/draw_vs.c
src/gallium/auxiliary/draw/draw_vs.h
src/gallium/auxiliary/draw/draw_vs_aos.c
src/gallium/auxiliary/draw/draw_vs_aos.h
src/gallium/auxiliary/draw/draw_vs_aos_machine.c [new file with mode: 0644]
src/gallium/auxiliary/draw/draw_vs_varient.c

index 9a88ecc07087ff532c75a3ba290297642626d130..f2e36a89e901b093f8316dfb72bcb99f1204867f 100644 (file)
@@ -37,6 +37,7 @@ C_SOURCES = \
        draw_vs_varient.c \
        draw_vs_aos.c \
        draw_vs_aos_io.c \
+       draw_vs_aos_machine.c \
        draw_vs_exec.c \
        draw_vs_llvm.c \
        draw_vs_sse.c 
index 26919a22982881a507b4d221a3cd4e553c63f83e..925e668f2226dbc331882642e5f2ce2d40583996 100644 (file)
@@ -36,6 +36,7 @@ draw = env.ConvenienceLibrary(
                'draw_vs.c',
                'draw_vs_aos.c',
                'draw_vs_aos_io.c',
+               'draw_vs_aos_machine.c',
                'draw_vs_exec.c',
                'draw_vs_llvm.c',
                'draw_vs_sse.c',
index 22420749656984be82f7f9788461c6d51d5bdd66..8509baf8654772360764b45c2724508c88b698b2 100644 (file)
@@ -174,6 +174,8 @@ void draw_set_viewport_state( struct draw_context *draw,
                               viewport->translate[1] == 0.0f &&
                               viewport->translate[2] == 0.0f &&
                               viewport->translate[3] == 0.0f);
+
+   draw_vs_set_viewport( draw, viewport );
 }
 
 
@@ -218,6 +220,7 @@ draw_set_mapped_constant_buffer(struct draw_context *draw,
                                 const void *buffer)
 {
    draw->pt.user.constants = buffer;
+   draw_vs_set_constants( draw, (const float (*)[4])buffer );
 }
 
 
index c095bf3d7b93f349c9b3cea36bdc5199b3eb3802..4cbccc8b5bb22462e3939082fdde29b94e5c9584 100644 (file)
@@ -183,6 +183,10 @@ struct draw_context
        */
       struct gallivm_cpu_engine *engine;   
 
+      /* Here's another one:
+       */
+      struct aos_machine *aos_machine; 
+
 
       struct translate *fetch;
       struct translate_cache *fetch_cache;
@@ -215,6 +219,11 @@ struct draw_context
 boolean draw_vs_init( struct draw_context *draw );
 void draw_vs_destroy( struct draw_context *draw );
 
+void draw_vs_set_viewport( struct draw_context *, 
+                           const struct pipe_viewport_state * );
+
+void draw_vs_set_constants( struct draw_context *,
+                            const float (*constants)[4] );
 
 
 
index 729c7db99995fefd4cb06d253acf395fddab5a12..5265a131605a35a26a87490c7cc2f24575e87731 100644 (file)
@@ -189,12 +189,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
                               draw->pt.vertex_buffer[buf].pitch );
    }
 
-   fse->active->set_constants( fse->active,
-                               (const float (*)[4])draw->pt.user.constants );
-
-   fse->active->set_viewport( fse->active,
-                              &draw->viewport );
-
    //return TRUE;
 }
 
index 9b899d404e19293ba6473b74053433af8c57d84e..a8b6d0c90d2500c3c8c0aecd9e980621ee374643 100644 (file)
 
 
 
+
+void draw_vs_set_constants( struct draw_context *draw,
+                            const float (*constants)[4] )
+{
+   draw_vs_aos_machine_constants( draw->vs.aos_machine, constants );
+}
+
+
+void draw_vs_set_viewport( struct draw_context *draw,
+                           const struct pipe_viewport_state *viewport )
+{
+   draw_vs_aos_machine_viewport( draw->vs.aos_machine, viewport );
+}
+
+
+
 struct draw_vertex_shader *
 draw_create_vertex_shader(struct draw_context *draw,
                           const struct pipe_shader_state *shader)
@@ -83,6 +99,13 @@ void
 draw_delete_vertex_shader(struct draw_context *draw,
                           struct draw_vertex_shader *dvs)
 {
+   unsigned i;
+
+   for (i = 0; i < dvs->nr_varients; i++) 
+      dvs->varient[i]->destroy( dvs->varient[i] );
+
+   dvs->nr_varients = 0;
+
    dvs->delete( dvs );
 }
 
@@ -110,6 +133,10 @@ draw_vs_init( struct draw_context *draw )
    draw->vs.fetch_cache = translate_cache_create();
    if (!draw->vs.fetch_cache) 
       return FALSE;
+
+   draw->vs.aos_machine = draw_vs_aos_machine();
+   if (!draw->vs.aos_machine)
+      return FALSE;
       
    return TRUE;
 }
@@ -129,6 +156,9 @@ draw_vs_destroy( struct draw_context *draw )
    if (draw->vs.emit_cache)
       translate_cache_destroy(draw->vs.emit_cache);
 
+   if (draw->vs.aos_machine)
+      draw_vs_aos_machine_destroy(draw->vs.aos_machine);
+
    tgsi_exec_machine_free_data(&draw->vs.machine);
 
 }
@@ -153,10 +183,17 @@ draw_vs_lookup_varient( struct draw_vertex_shader *vs,
    if (varient == NULL)
       return NULL;
 
-   /* Add it to our list: 
+   /* Add it to our list, could be smarter
     */
-   assert(vs->nr_varients < Elements(vs->varient));
-   vs->varient[vs->nr_varients++] = varient;
+   if (vs->nr_varients < Elements(vs->varient)) {
+      vs->varient[vs->nr_varients++] = varient;
+   }
+   else {
+      vs->last_varient++;
+      vs->last_varient %= Elements(vs->varient);
+      vs->varient[vs->last_varient]->destroy(vs->varient[vs->last_varient]);
+      vs->varient[vs->last_varient] = varient;
+   }
 
    /* Done 
     */
index 7aa0415baf0f69c88f478cdc8e8eb6135fe0ca80..08c6de8ba86218e6f03c5146896e222f4df4a277 100644 (file)
@@ -70,16 +70,6 @@ struct draw_vs_varient_key {
 
 struct draw_vs_varient;
 
-typedef void (PIPE_CDECL *vsv_run_elts_func)( struct draw_vs_varient *,
-                                              const unsigned *elts,
-                                              unsigned count,
-                                              void *output_buffer);
-
-typedef void (PIPE_CDECL *vsv_run_linear_func)( struct draw_vs_varient *,
-                                                unsigned start,
-                                                unsigned count,
-                                                void *output_buffer);
-
 
 struct draw_vs_varient {
    struct draw_vs_varient_key key;
@@ -91,12 +81,6 @@ struct draw_vs_varient {
                       const void *ptr,
                       unsigned stride );
 
-   void (*set_constants)( struct draw_vs_varient *,
-                          const float (*constants)[4] );
-
-   void (*set_viewport)( struct draw_vs_varient *,
-                         const struct pipe_viewport_state * );
-
    void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader,
                                   unsigned start,
                                   unsigned count,
@@ -131,6 +115,7 @@ struct draw_vertex_shader {
     */
    struct draw_vs_varient *varient[16];
    unsigned nr_varients;
+   unsigned last_varient;
    struct draw_vs_varient *(*create_varient)( struct draw_vertex_shader *shader,
                                               const struct draw_vs_varient_key *key );
 
@@ -217,7 +202,14 @@ static INLINE int draw_vs_varient_key_compare( const struct draw_vs_varient_key
 }
 
 
+struct aos_machine *draw_vs_aos_machine( void );
+void draw_vs_aos_machine_destroy( struct aos_machine *machine );
+
+void draw_vs_aos_machine_constants( struct aos_machine *machine,
+                                    const float (*constants)[4] );
 
+void draw_vs_aos_machine_viewport( struct aos_machine *machine,
+                                   const struct pipe_viewport_state *viewport );
 
 
 #define MAX_TGSI_VERTICES 4
index 9056785e7a611cf986bac1bbded20f7481830836..b5e4e1e7b1a37640b3a75378cb439b256c4805fd 100644 (file)
@@ -149,70 +149,7 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
 #define X87_CW_ROUND_MASK             (3<<10)
 #define X87_CW_INFINITY               (1<<12)
 
-static void do_populate_lut( struct shine_tab *tab,
-                             float unclamped_exponent )
-{
-   const float epsilon = 1.0F / 256.0F;    
-   float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon));
-   unsigned i;
 
-   tab->exponent = unclamped_exponent; /* for later comparison */
-   
-   tab->values[0] = 0;
-   if (exponent == 0) {
-      for (i = 1; i < 258; i++) {
-         tab->values[i] = 1.0;
-      }      
-   }
-   else {
-      for (i = 1; i < 258; i++) {
-         tab->values[i] = powf((float)i * epsilon, exponent);
-      }
-   }
-}
-
-static void init_internals( struct aos_machine *machine )
-{
-   unsigned i;
-   float inv = 1.0f/255.0f;
-   float f255 = 255.0f;
-
-   ASSIGN_4V(machine->internal[IMM_SWZ],       1.0f,  -1.0f,  0.0f, 1.0f);
-   *(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff;
-
-   ASSIGN_4V(machine->internal[IMM_ONES],      1.0f,  1.0f,  1.0f,  1.0f);
-   ASSIGN_4V(machine->internal[IMM_NEGS],     -1.0f, -1.0f, -1.0f, -1.0f);
-   ASSIGN_4V(machine->internal[IMM_IDENTITY],  0.0f,  0.0f,  0.0f,  1.0f);
-   ASSIGN_4V(machine->internal[IMM_INV_255],   inv,   inv,   inv,   inv);
-   ASSIGN_4V(machine->internal[IMM_255],       f255,  f255,  f255,  f255);
-   ASSIGN_4V(machine->internal[IMM_RSQ],       -.5f,  1.5f,  0.0f,  0.0f);
-
-
-   machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP |
-                               X87_CW_EXCEPTION_DENORM_OP |
-                               X87_CW_EXCEPTION_ZERO_DIVIDE |
-                               X87_CW_EXCEPTION_OVERFLOW |
-                               X87_CW_EXCEPTION_UNDERFLOW |
-                               X87_CW_EXCEPTION_PRECISION |
-                               (1<<6) |
-                               X87_CW_ROUND_NEAREST |
-                               X87_CW_PRECISION_DOUBLE_EXT);
-
-   assert(machine->fpu_rnd_nearest == 0x37f);
-                               
-   machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP |
-                               X87_CW_EXCEPTION_DENORM_OP |
-                               X87_CW_EXCEPTION_ZERO_DIVIDE |
-                               X87_CW_EXCEPTION_OVERFLOW |
-                               X87_CW_EXCEPTION_UNDERFLOW |
-                               X87_CW_EXCEPTION_PRECISION |
-                               (1<<6) |
-                               X87_CW_ROUND_DOWN |
-                               X87_CW_PRECISION_DOUBLE_EXT);
-
-   for (i = 0; i < MAX_SHINE_TAB; i++)
-      do_populate_lut( &machine->shine_tab[i], 1.0f );
-}
 
 
 static void spill( struct aos_compilation *cp, unsigned idx )
@@ -1220,136 +1157,6 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst
    return TRUE;
 }
 
-static void PIPE_CDECL do_lit( struct aos_machine *machine,
-                               float *result,
-                               const float *in,
-                               unsigned count )
-{
-   if (in[0] > 0) 
-   {
-      if (in[1] <= 0.0) 
-      {
-         result[0] = 1.0F;
-         result[1] = in[0];
-         result[2] = 1.0;
-         result[3] = 1.0F;
-      }
-      else
-      {
-         const float epsilon = 1.0F / 256.0F;    
-         float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
-         result[0] = 1.0F;
-         result[1] = in[0];
-         result[2] = powf(in[1], exponent);
-         result[3] = 1.0;
-      }
-   }
-   else 
-   {
-      result[0] = 1.0F;
-      result[1] = 0.0;
-      result[2] = 0.0;
-      result[3] = 1.0F;
-   }
-}
-
-
-static void PIPE_CDECL do_lit_lut( struct aos_machine *machine,
-                                   float *result,
-                                   const float *in,
-                                   unsigned count )
-{
-   if (in[0] > 0) 
-   {
-      if (in[1] <= 0.0) 
-      {
-         result[0] = 1.0F;
-         result[1] = in[0];
-         result[2] = 1.0;
-         result[3] = 1.0F;
-         return;
-      }
-      
-      if (machine->lit_info[count].shine_tab->exponent != in[3]) {
-         machine->lit_info[count].func = do_lit;
-         goto no_luck;
-      }
-
-      if (in[1] <= 1.0)
-      {
-         const float *tab = machine->lit_info[count].shine_tab->values;
-         float f = in[1] * 256;
-         int k = (int)f;
-         float frac = f - (float)k;
-         
-         result[0] = 1.0F;
-         result[1] = in[0];
-         result[2] = tab[k] + frac*(tab[k+1]-tab[k]);
-         result[3] = 1.0;
-         return;
-      }
-      
-   no_luck:
-      {
-         const float epsilon = 1.0F / 256.0F;    
-         float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
-         result[0] = 1.0F;
-         result[1] = in[0];
-         result[2] = powf(in[1], exponent);
-         result[3] = 1.0;
-      }
-   }
-   else 
-   {
-      result[0] = 1.0F;
-      result[1] = 0.0;
-      result[2] = 0.0;
-      result[3] = 1.0F;
-   }
-}
-
-
-
-static void PIPE_CDECL populate_lut( struct aos_machine *machine,
-                                     float *result,
-                                     const float *in,
-                                     unsigned count )
-{
-   unsigned i, tab;
-
-   /* Search for an existing table for this value.  Note that without
-    * static analysis we don't really know if in[3] will be constant,
-    * but it usually is...
-    */
-   for (tab = 0; tab < 4; tab++) {
-      if (machine->shine_tab[tab].exponent == in[3]) {
-         goto found;
-      }
-   }
-
-   for (tab = 0, i = 1; i < 4; i++) {
-      if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used)
-         tab = i;
-   }
-
-   if (machine->shine_tab[tab].last_used == machine->now) {
-      /* No unused tables (this is not a ffvertex program...).  Just
-       * call pow each time:
-       */
-      machine->lit_info[count].func = do_lit;
-      machine->lit_info[count].func( machine, result, in, count );
-      return;
-   }
-   else {
-      do_populate_lut( &machine->shine_tab[tab], in[3] );
-   }
-
- found:
-   machine->shine_tab[tab].last_used = machine->now;
-   machine->lit_info[count].shine_tab = &machine->shine_tab[tab];
-   machine->lit_info[count].func = do_lit_lut;
-   machine->lit_info[count].func( machine, result, in, count );
-}
 
 
 
@@ -1413,7 +1220,7 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst
                                              Offset(struct lit_info, func)));
    }
    else {
-      x86_mov_reg_imm( cp->func, ecx, (int)do_lit );
+      x86_mov_reg_imm( cp->func, ecx, (int)aos_do_lit );
    }
 
    x86_call( cp->func, ecx );
@@ -1434,7 +1241,7 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst
    return TRUE;
 }
 
-   
+#if 0   
 static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
 {
    struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); 
@@ -1495,6 +1302,7 @@ static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_fu
 
    return TRUE;
 }
+#endif
 
 
 
@@ -1945,7 +1753,7 @@ static void find_last_write_outputs( struct aos_compilation *cp )
 }
 
 
-#define ARG_VARIENT    1
+#define ARG_MACHINE    1
 #define ARG_START_ELTS 2
 #define ARG_COUNT      3
 #define ARG_OUTBUF     4
@@ -1985,7 +1793,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
 
    /* Load arguments into regs:
     */
-   x86_mov(cp.func, cp.machine_EDX, x86_fn_arg(cp.func, ARG_VARIENT));
+   x86_mov(cp.func, cp.machine_EDX, x86_fn_arg(cp.func, ARG_MACHINE));
    x86_mov(cp.func, cp.idx_EBX, x86_fn_arg(cp.func, ARG_START_ELTS));
    x86_mov(cp.func, cp.count_ESI, x86_fn_arg(cp.func, ARG_COUNT));
    x86_mov(cp.func, cp.outbuf_ECX, x86_fn_arg(cp.func, ARG_OUTBUF));
@@ -1997,11 +1805,6 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
    x86_cmp(cp.func, cp.count_ESI, cp.tmp_EAX);
    fixup = x86_jcc_forward(cp.func, cc_E);
 
-   /* Dig out the machine pointer from inside the varient arg 
-    */
-   x86_mov(cp.func, cp.machine_EDX, 
-           x86_make_disp(cp.machine_EDX,
-                         Offset( struct draw_vs_varient_aos_sse, machine )));
 
    save_fpu_state( &cp );
    set_fpu_round_nearest( &cp );
@@ -2151,13 +1954,14 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
                                       void *output_buffer )
 {
    struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+   struct aos_machine *machine = vaos->draw->vs.aos_machine;
 
-   vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
-   vaos->machine->constants = vaos->draw->pt.user.constants;
-   vaos->machine->immediates = vaos->base.vs->immediates;
-   vaos->machine->attrib = vaos->attrib;
+   machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
+   machine->constants = (const float (*)[4])vaos->draw->pt.user.constants;
+   machine->immediates = vaos->base.vs->immediates;
+   machine->attrib = vaos->attrib;
 
-   vaos->gen_run_elts( varient,
+   vaos->gen_run_elts( machine,
                        elts,
                        count,
                        output_buffer );
@@ -2169,61 +1973,25 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
                                         void *output_buffer )
 {
    struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+   struct aos_machine *machine = vaos->draw->vs.aos_machine;
 
-   vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
-   vaos->machine->constants = vaos->draw->pt.user.constants;
-   vaos->machine->immediates = vaos->base.vs->immediates;
-   vaos->machine->attrib = vaos->attrib;
+   machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
+   machine->constants = (const float (*)[4])vaos->draw->pt.user.constants;
+   machine->immediates = vaos->base.vs->immediates;
+   machine->attrib = vaos->attrib;
 
-   vaos->gen_run_linear( varient,
+   vaos->gen_run_linear( machine,
                          start,
                          count,
                          output_buffer );
 }
 
 
-static void vaos_set_constants( struct draw_vs_varient *varient,
-                                const float (*constants)[4] )
-{
-   struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
-
-#if 0
-   unsigned i;
-   for (i =0; i < vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1; i++)
-      debug_printf("state %d: %f %f %f %f\n",
-                   i, 
-                   constants[i][0],
-                   constants[i][1],
-                   constants[i][2],
-                   constants[i][3]);
-#endif
-
-   {
-      unsigned i;
-      for (i = 0; i < MAX_LIT_INFO; i++) {
-         vaos->machine->lit_info[i].func = populate_lut;
-         vaos->machine->now++;
-      }
-   }
-}
-
-
-static void vaos_set_viewport( struct draw_vs_varient *varient,
-                               const struct pipe_viewport_state *viewport )
-{
-   struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
-
-   memcpy(vaos->machine->scale, viewport->scale, 4 * sizeof(float));
-   memcpy(vaos->machine->translate, viewport->translate, 4 * sizeof(float));
-}
 
 static void vaos_destroy( struct draw_vs_varient *varient )
 {
    struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
 
-   if (vaos->machine)
-      align_free( vaos->machine );
-
    FREE( vaos->attrib );
 
    x86_release_func( &vaos->func[0] );
@@ -2245,8 +2013,6 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
    vaos->base.key = *key;
    vaos->base.vs = vs;
    vaos->base.set_input = vaos_set_buffer;
-   vaos->base.set_constants = vaos_set_constants;
-   vaos->base.set_viewport = vaos_set_viewport;
    vaos->base.destroy = vaos_destroy;
    vaos->base.run_linear = vaos_run_linear;
    vaos->base.run_elts = vaos_run_elts;
@@ -2257,13 +2023,6 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
    if (!vaos->attrib)
       goto fail;
 
-   vaos->machine = align_malloc( sizeof(struct aos_machine), 16 );
-   if (!vaos->machine)
-      goto fail;
-   
-   memset(vaos->machine, 0, sizeof(struct aos_machine));
-   init_internals(vaos->machine);
-
    tgsi_dump(vs->state.tokens, 0);
 
    if (!build_vertex_program( vaos, TRUE ))
@@ -2272,11 +2031,11 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
    if (!build_vertex_program( vaos, FALSE ))
       goto fail;
 
-   vaos->gen_run_linear = (vsv_run_linear_func)x86_get_func(&vaos->func[0]);
+   vaos->gen_run_linear = (vaos_run_linear_func)x86_get_func(&vaos->func[0]);
    if (!vaos->gen_run_linear)
       goto fail;
 
-   vaos->gen_run_elts = (vsv_run_elts_func)x86_get_func(&vaos->func[1]);
+   vaos->gen_run_elts = (vaos_run_elts_func)x86_get_func(&vaos->func[1]);
    if (!vaos->gen_run_elts)
       goto fail;
 
@@ -2286,9 +2045,6 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
    if (vaos && vaos->attrib)
       FREE(vaos->attrib);
 
-   if (vaos && vaos->machine)
-      align_free( vaos->machine );
-
    if (vaos)
       x86_release_func( &vaos->func[0] );
 
index 295d2cb3fe58c322aa475662186a568510ea19a9..89a9174151de5fa13356fcf0ba953e9203494881 100644 (file)
@@ -60,10 +60,16 @@ struct x86_function;
 #define FPU_RND_NEAREST 2
 
 struct aos_machine;
-typedef void (PIPE_CDECL *lit_func)( struct aos_machine *,
+typedef void PIPE_CDECL (*lit_func)( struct aos_machine *,
                                     float *result,
                                     const float *in,
                                     unsigned count );
+
+PIPE_CDECL void aos_do_lit( struct aos_machine *machine,
+                            float *result,
+                            const float *in,
+                            unsigned count );
+
 struct shine_tab {
    float exponent;
    float values[258];
@@ -207,16 +213,25 @@ struct x86_reg aos_get_x86( struct aos_compilation *cp,
                             unsigned value );
 
 
+typedef void (PIPE_CDECL *vaos_run_elts_func)( struct aos_machine *,
+                                               const unsigned *elts,
+                                               unsigned count,
+                                               void *output_buffer);
+
+typedef void (PIPE_CDECL *vaos_run_linear_func)( struct aos_machine *,
+                                                unsigned start,
+                                                unsigned count,
+                                                void *output_buffer);
+
+
 struct draw_vs_varient_aos_sse {
    struct draw_vs_varient base;
    struct draw_context *draw;
 
    struct aos_attrib *attrib;
 
-   struct aos_machine *machine; /* XXX: temporarily unshared */
-
-   vsv_run_linear_func gen_run_linear;
-   vsv_run_elts_func gen_run_elts;
+   vaos_run_linear_func gen_run_linear;
+   vaos_run_elts_func gen_run_elts;
 
 
    struct x86_function func[2];
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c
new file mode 100644 (file)
index 0000000..53e999b
--- /dev/null
@@ -0,0 +1,297 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/util/tgsi_util.h"
+#include "tgsi/exec/tgsi_exec.h"
+#include "draw_vs.h"
+#include "draw_vs_aos.h"
+#include "draw_vertex.h"
+
+#include "rtasm/rtasm_x86sse.h"
+
+
+#define X87_CW_EXCEPTION_INV_OP       (1<<0)
+#define X87_CW_EXCEPTION_DENORM_OP    (1<<1)
+#define X87_CW_EXCEPTION_ZERO_DIVIDE  (1<<2)
+#define X87_CW_EXCEPTION_OVERFLOW     (1<<3)
+#define X87_CW_EXCEPTION_UNDERFLOW    (1<<4)
+#define X87_CW_EXCEPTION_PRECISION    (1<<5)
+#define X87_CW_PRECISION_SINGLE       (0<<8)
+#define X87_CW_PRECISION_RESERVED     (1<<8)
+#define X87_CW_PRECISION_DOUBLE       (2<<8)
+#define X87_CW_PRECISION_DOUBLE_EXT   (3<<8)
+#define X87_CW_PRECISION_MASK         (3<<8)
+#define X87_CW_ROUND_NEAREST          (0<<10)
+#define X87_CW_ROUND_DOWN             (1<<10)
+#define X87_CW_ROUND_UP               (2<<10)
+#define X87_CW_ROUND_ZERO             (3<<10)
+#define X87_CW_ROUND_MASK             (3<<10)
+#define X87_CW_INFINITY               (1<<12)
+
+
+PIPE_CDECL void aos_do_lit( struct aos_machine *machine,
+                            float *result,
+                            const float *in,
+                            unsigned count )
+{
+   if (in[0] > 0) 
+   {
+      if (in[1] <= 0.0) 
+      {
+         result[0] = 1.0F;
+         result[1] = in[0];
+         result[2] = 1.0;
+         result[3] = 1.0F;
+      }
+      else
+      {
+         const float epsilon = 1.0F / 256.0F;    
+         float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
+         result[0] = 1.0F;
+         result[1] = in[0];
+         result[2] = powf(in[1], exponent);
+         result[3] = 1.0;
+      }
+   }
+   else 
+   {
+      result[0] = 1.0F;
+      result[1] = 0.0;
+      result[2] = 0.0;
+      result[3] = 1.0F;
+   }
+}
+
+
+static PIPE_CDECL void do_lit_lut( struct aos_machine *machine,
+                                   float *result,
+                                   const float *in,
+                                   unsigned count )
+{
+   if (in[0] > 0) 
+   {
+      if (in[1] <= 0.0) 
+      {
+         result[0] = 1.0F;
+         result[1] = in[0];
+         result[2] = 1.0;
+         result[3] = 1.0F;
+         return;
+      }
+      
+      if (machine->lit_info[count].shine_tab->exponent != in[3]) {
+         machine->lit_info[count].func = aos_do_lit;
+         goto no_luck;
+      }
+
+      if (in[1] <= 1.0)
+      {
+         const float *tab = machine->lit_info[count].shine_tab->values;
+         float f = in[1] * 256;
+         int k = (int)f;
+         float frac = f - (float)k;
+         
+         result[0] = 1.0F;
+         result[1] = in[0];
+         result[2] = tab[k] + frac*(tab[k+1]-tab[k]);
+         result[3] = 1.0;
+         return;
+      }
+      
+   no_luck:
+      {
+         const float epsilon = 1.0F / 256.0F;    
+         float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
+         result[0] = 1.0F;
+         result[1] = in[0];
+         result[2] = powf(in[1], exponent);
+         result[3] = 1.0;
+      }
+   }
+   else 
+   {
+      result[0] = 1.0F;
+      result[1] = 0.0;
+      result[2] = 0.0;
+      result[3] = 1.0F;
+   }
+}
+
+
+static void do_populate_lut( struct shine_tab *tab,
+                             float unclamped_exponent )
+{
+   const float epsilon = 1.0F / 256.0F;    
+   float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon));
+   unsigned i;
+
+   tab->exponent = unclamped_exponent; /* for later comparison */
+   
+   tab->values[0] = 0;
+   if (exponent == 0) {
+      for (i = 1; i < 258; i++) {
+         tab->values[i] = 1.0;
+      }      
+   }
+   else {
+      for (i = 1; i < 258; i++) {
+         tab->values[i] = powf((float)i * epsilon, exponent);
+      }
+   }
+}
+
+
+
+
+static void PIPE_CDECL populate_lut( struct aos_machine *machine,
+                                     float *result,
+                                     const float *in,
+                                     unsigned count )
+{
+   unsigned i, tab;
+
+   /* Search for an existing table for this value.  Note that without
+    * static analysis we don't really know if in[3] will be constant,
+    * but it usually is...
+    */
+   for (tab = 0; tab < 4; tab++) {
+      if (machine->shine_tab[tab].exponent == in[3]) {
+         goto found;
+      }
+   }
+
+   for (tab = 0, i = 1; i < 4; i++) {
+      if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used)
+         tab = i;
+   }
+
+   if (machine->shine_tab[tab].last_used == machine->now) {
+      /* No unused tables (this is not a ffvertex program...).  Just
+       * call pow each time:
+       */
+      machine->lit_info[count].func = aos_do_lit;
+      machine->lit_info[count].func( machine, result, in, count );
+      return;
+   }
+   else {
+      do_populate_lut( &machine->shine_tab[tab], in[3] );
+   }
+
+ found:
+   machine->shine_tab[tab].last_used = machine->now;
+   machine->lit_info[count].shine_tab = &machine->shine_tab[tab];
+   machine->lit_info[count].func = do_lit_lut;
+   machine->lit_info[count].func( machine, result, in, count );
+}
+
+
+void draw_vs_aos_machine_constants( struct aos_machine *machine,
+                                    const float (*constants)[4] )
+{
+   machine->constants = constants;
+
+   {
+      unsigned i;
+      for (i = 0; i < MAX_LIT_INFO; i++) {
+         machine->lit_info[i].func = populate_lut;
+         machine->now++;
+      }
+   }
+}
+
+
+void draw_vs_aos_machine_viewport( struct aos_machine *machine,
+                                   const struct pipe_viewport_state *viewport )
+{
+   memcpy(machine->scale, viewport->scale, 4 * sizeof(float));
+   memcpy(machine->translate, viewport->translate, 4 * sizeof(float));
+}
+
+
+
+void draw_vs_aos_machine_destroy( struct aos_machine *machine )
+{
+   align_free(machine);
+}
+
+struct aos_machine *draw_vs_aos_machine( void )
+{
+   struct aos_machine *machine;
+   unsigned i;
+   float inv = 1.0f/255.0f;
+   float f255 = 255.0f;
+
+   machine = align_malloc(sizeof(struct aos_machine), 16);
+   if (!machine)
+      return NULL;
+
+   memset(machine, 0, sizeof(*machine));
+
+   ASSIGN_4V(machine->internal[IMM_SWZ],       1.0f,  -1.0f,  0.0f, 1.0f);
+   *(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff;
+
+   ASSIGN_4V(machine->internal[IMM_ONES],      1.0f,  1.0f,  1.0f,  1.0f);
+   ASSIGN_4V(machine->internal[IMM_NEGS],     -1.0f, -1.0f, -1.0f, -1.0f);
+   ASSIGN_4V(machine->internal[IMM_IDENTITY],  0.0f,  0.0f,  0.0f,  1.0f);
+   ASSIGN_4V(machine->internal[IMM_INV_255],   inv,   inv,   inv,   inv);
+   ASSIGN_4V(machine->internal[IMM_255],       f255,  f255,  f255,  f255);
+   ASSIGN_4V(machine->internal[IMM_RSQ],       -.5f,  1.5f,  0.0f,  0.0f);
+
+
+   machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP |
+                               X87_CW_EXCEPTION_DENORM_OP |
+                               X87_CW_EXCEPTION_ZERO_DIVIDE |
+                               X87_CW_EXCEPTION_OVERFLOW |
+                               X87_CW_EXCEPTION_UNDERFLOW |
+                               X87_CW_EXCEPTION_PRECISION |
+                               (1<<6) |
+                               X87_CW_ROUND_NEAREST |
+                               X87_CW_PRECISION_DOUBLE_EXT);
+
+   assert(machine->fpu_rnd_nearest == 0x37f);
+                               
+   machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP |
+                               X87_CW_EXCEPTION_DENORM_OP |
+                               X87_CW_EXCEPTION_ZERO_DIVIDE |
+                               X87_CW_EXCEPTION_OVERFLOW |
+                               X87_CW_EXCEPTION_UNDERFLOW |
+                               X87_CW_EXCEPTION_PRECISION |
+                               (1<<6) |
+                               X87_CW_ROUND_DOWN |
+                               X87_CW_PRECISION_DOUBLE_EXT);
+
+   for (i = 0; i < MAX_SHINE_TAB; i++)
+      do_populate_lut( &machine->shine_tab[i], 1.0f );
+
+   return machine;
+}
+
+
index 784ae41205f92f9d8821aed2bc7add40a097f0ef..18cb06e3742abde70c6614c0a9854b2a33841c90 100644 (file)
@@ -44,8 +44,6 @@
 struct draw_vs_varient_generic {
    struct draw_vs_varient base;
 
-   struct pipe_viewport_state viewport;
-
    struct draw_vertex_shader *shader;
    struct draw_context *draw;
    
@@ -57,21 +55,11 @@ struct draw_vs_varient_generic {
     */
    struct translate *fetch;
    struct translate *emit;
-
-   const float (*constants)[4];
 };
 
 
 
 
-static void vsvg_set_constants( struct draw_vs_varient *varient,
-                                const float (*constants)[4] )
-{
-   struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
-
-   vsvg->constants = constants;
-}
-
 
 static void vsvg_set_input( struct draw_vs_varient *varient,
                             unsigned buffer,
@@ -94,8 +82,8 @@ static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg,
                              void *output_buffer )
 {
    char *ptr = (char *)output_buffer;
-   const float *scale = vsvg->viewport.scale;
-   const float *trans = vsvg->viewport.translate;
+   const float *scale = vsvg->base.vs->draw->viewport.scale;
+   const float *trans = vsvg->base.vs->draw->viewport.translate;
    unsigned stride = vsvg->base.key.output_stride;
    unsigned j;
 
@@ -115,8 +103,8 @@ static void do_viewport( struct draw_vs_varient_generic *vsvg,
                              void *output_buffer )
 {
    char *ptr = (char *)output_buffer;
-   const float *scale = vsvg->viewport.scale;
-   const float *trans = vsvg->viewport.translate;
+   const float *scale = vsvg->base.vs->draw->viewport.scale;
+   const float *trans = vsvg->base.vs->draw->viewport.translate;
    unsigned stride = vsvg->base.key.output_stride;
    unsigned j;
 
@@ -130,10 +118,10 @@ static void do_viewport( struct draw_vs_varient_generic *vsvg,
 }
                          
 
-static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
-                                      const unsigned *elts,
-                                      unsigned count,
-                                      void *output_buffer )
+static void vsvg_run_elts( struct draw_vs_varient *varient,
+                           const unsigned *elts,
+                           unsigned count,
+                           void *output_buffer)
 {
    struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
                        
@@ -150,7 +138,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
       vsvg->base.vs->run_linear( vsvg->base.vs, 
                                  output_buffer,
                                  output_buffer,
-                                 vsvg->constants,
+                                 (const float (*)[4])vsvg->base.vs->draw->pt.user.constants,
                                  count,
                                  vsvg->base.key.output_stride, 
                                  vsvg->base.key.output_stride);
@@ -186,10 +174,10 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
 }
 
 
-static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
-                                        unsigned start,
-                                        unsigned count,
-                                        void *output_buffer )
+static void vsvg_run_linear( struct draw_vs_varient *varient,
+                                   unsigned start,
+                                   unsigned count,
+                                   void *output_buffer )
 {
    struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
        
@@ -206,7 +194,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
       vsvg->base.vs->run_linear( vsvg->base.vs, 
                                  output_buffer,
                                  output_buffer,
-                                 vsvg->constants,
+                                 (const float (*)[4])vsvg->base.vs->draw->pt.user.constants,
                                  count,
                                  vsvg->base.key.output_stride, 
                                  vsvg->base.key.output_stride);
@@ -245,13 +233,6 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
 
 
 
-static void vsvg_set_viewport( struct draw_vs_varient *varient,
-                               const struct pipe_viewport_state *viewport )
-{
-   struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
-
-   vsvg->viewport = *viewport;
-}
 
 static void vsvg_destroy( struct draw_vs_varient *varient )
 {
@@ -272,8 +253,6 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
    vsvg->base.key = *key;
    vsvg->base.vs = vs;
    vsvg->base.set_input     = vsvg_set_input;
-   vsvg->base.set_constants = vsvg_set_constants;
-   vsvg->base.set_viewport  = vsvg_set_viewport;
    vsvg->base.run_elts      = vsvg_run_elts;
    vsvg->base.run_linear    = vsvg_run_linear;
    vsvg->base.destroy       = vsvg_destroy;