Squashed commit of the following:
[mesa.git] / src / gallium / drivers / r300 / r300_state_derived.c
index 7166694edf46646662f9568c5cd9195c546ef89b..ae54d06372f6fc8a38c94fec0815ba51a8d835f1 100644 (file)
@@ -1,5 +1,6 @@
 /*
  * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -28,6 +29,7 @@
 #include "r300_context.h"
 #include "r300_fs.h"
 #include "r300_screen.h"
+#include "r300_shader_semantics.h"
 #include "r300_state_derived.h"
 #include "r300_state_inlines.h"
 #include "r300_vs.h"
 /* r300_state_derived: Various bits of state which are dependent upon
  * currently bound CSO data. */
 
-struct r300_shader_key {
-    struct r300_vertex_shader* vs;
-    struct r300_fragment_shader* fs;
-};
-
-struct r300_shader_derived_value {
-    struct r300_vertex_format* vformat;
-    struct r300_rs_block* rs_block;
-};
-
-unsigned r300_shader_key_hash(void* key) {
-    struct r300_shader_key* shader_key = (struct r300_shader_key*)key;
-    unsigned vs = (unsigned)shader_key->vs;
-    unsigned fs = (unsigned)shader_key->fs;
-
-    return (vs << 16) | (fs & 0xffff);
-}
-
-int r300_shader_key_compare(void* key1, void* key2) {
-    struct r300_shader_key* shader_key1 = (struct r300_shader_key*)key1;
-    struct r300_shader_key* shader_key2 = (struct r300_shader_key*)key2;
-
-    return (shader_key1->vs == shader_key2->vs) &&
-        (shader_key1->fs == shader_key2->fs);
+static void r300_draw_emit_attrib(struct r300_context* r300,
+                                  enum attrib_emit emit,
+                                  enum interp_mode interp,
+                                  int index)
+{
+    struct r300_vertex_shader* vs = r300->vs_state.state;
+    struct tgsi_shader_info* info = &vs->info;
+    int output;
+
+    output = draw_find_shader_output(r300->draw,
+                                     info->output_semantic_name[index],
+                                     info->output_semantic_index[index]);
+    draw_emit_vertex_attr(&r300->vertex_info, emit, interp, output);
 }
 
-/* Set up the vs_tab and routes. */
-static void r300_vs_tab_routes(struct r300_context* r300,
-                               struct r300_vertex_info* vformat)
+static void r300_draw_emit_all_attribs(struct r300_context* r300)
 {
-    struct r300_screen* r300screen = r300_screen(r300->context.screen);
-    struct vertex_info* vinfo = &vformat->vinfo;
-    int* tab = vformat->vs_tab;
-    boolean pos = FALSE, psize = FALSE, fog = FALSE;
-    int i, texs = 0, cols = 0;
-    struct tgsi_shader_info* info;
-
-    if (r300screen->caps->has_tcl) {
-        /* Use vertex shader to determine required routes. */
-        info = &r300->vs->info;
-    } else {
-        /* Use fragment shader to determine required routes. */
-        info = &r300->fs->info;
-    }
-
-    assert(info->num_inputs <= 16);
-
-    if (!r300screen->caps->has_tcl || !r300->rs_state->enable_vte)
-    {
-        for (i = 0; i < info->num_inputs; i++) {
-            switch (r300->vs->code.inputs[i]) {
-                case TGSI_SEMANTIC_POSITION:
-                    pos = TRUE;
-                    tab[i] = 0;
-                    break;
-                case TGSI_SEMANTIC_COLOR:
-                    tab[i] = 2 + cols;
-                    cols++;
-                    break;
-                case TGSI_SEMANTIC_PSIZE:
-                    assert(psize == FALSE);
-                    psize = TRUE;
-                    tab[i] = 15;
-                    break;
-                case TGSI_SEMANTIC_FOG:
-                    assert(fog == FALSE);
-                    fog = TRUE;
-                    /* Fall through */
-                case TGSI_SEMANTIC_GENERIC:
-                    tab[i] = 6 + texs;
-                    texs++;
-                    break;
-                default:
-                    debug_printf("r300: Unknown vertex input %d\n",
-                        info->input_semantic_name[i]);
-                    break;
-            }
-        }
-    }
-    else
-    {
-        /* Just copy vert attribs over as-is. */
-        for (i = 0; i < info->num_inputs; i++) {
-            tab[i] = i;
-        }
-
-        for (i = 0; i < info->num_outputs; i++) {
-            switch (info->output_semantic_name[i]) {
-                case TGSI_SEMANTIC_POSITION:
-                    pos = TRUE;
-                    break;
-                case TGSI_SEMANTIC_COLOR:
-                    cols++;
-                    break;
-                case TGSI_SEMANTIC_PSIZE:
-                    psize = TRUE;
-                    break;
-                case TGSI_SEMANTIC_FOG:
-                    fog = TRUE;
-                    /* Fall through */
-                case TGSI_SEMANTIC_GENERIC:
-                    texs++;
-                    break;
-                default:
-                    debug_printf("r300: Unknown vertex output %d\n",
-                        info->output_semantic_name[i]);
-                    break;
-            }
-        }
-    }
-
-    /* XXX magic */
-    assert(texs <= 8);
-
-    /* Do the actual vertex_info setup.
-     *
-     * vertex_info has four uints of hardware-specific data in it.
-     * vinfo.hwfmt[0] is R300_VAP_VTX_STATE_CNTL
-     * vinfo.hwfmt[1] is R300_VAP_VSM_VTX_ASSM
-     * vinfo.hwfmt[2] is R300_VAP_OUTPUT_VTX_FMT_0
-     * vinfo.hwfmt[3] is R300_VAP_OUTPUT_VTX_FMT_1 */
-
-    vinfo->hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */
-
-    /* We need to add vertex position attribute only for SW TCL case,
-     * for HW TCL case it could be generated by vertex shader */
-    if (!pos && !r300screen->caps->has_tcl) {
-        debug_printf("r300: Forcing vertex position attribute emit...\n");
-        /* Make room for the position attribute
-         * at the beginning of the tab. */
-        for (i = 15; i > 0; i--) {
-            tab[i] = tab[i-1];
-        }
-        tab[0] = 0;
-    }
+    struct r300_vertex_shader* vs = r300->vs_state.state;
+    struct r300_shader_semantics* vs_outputs = &vs->outputs;
+    int i, gen_count;
 
     /* Position. */
-    if (r300->draw) {
-        draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE,
-            draw_find_vs_output(r300->draw, TGSI_SEMANTIC_POSITION, 0));
+    if (vs_outputs->pos != ATTR_UNUSED) {
+        r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+                              vs_outputs->pos);
+    } else {
+        assert(0);
     }
-    vinfo->hwfmt[1] |= R300_INPUT_CNTL_POS;
-    vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
 
     /* Point size. */
-    if (psize) {
-        if (r300->draw) {
-            draw_emit_vertex_attr(vinfo, EMIT_1F_PSIZE, INTERP_POS,
-                draw_find_vs_output(r300->draw, TGSI_SEMANTIC_PSIZE, 0));
-        }
-        vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
+    if (vs_outputs->psize != ATTR_UNUSED) {
+        r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS,
+                              vs_outputs->psize);
     }
 
     /* Colors. */
-    for (i = 0; i < cols; i++) {
-        if (r300->draw) {
-            draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR,
-                draw_find_vs_output(r300->draw, TGSI_SEMANTIC_COLOR, i));
+    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+        if (vs_outputs->color[i] != ATTR_UNUSED) {
+            r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR,
+                                  vs_outputs->color[i]);
         }
-        vinfo->hwfmt[1] |= R300_INPUT_CNTL_COLOR;
-        vinfo->hwfmt[2] |= (R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i);
     }
 
-    /* Init i right here, increment it if fog is enabled.
-     * This gets around a double-increment problem. */
-    i = 0;
+    /* XXX Back-face colors. */
 
-    /* Fog. This is a special-cased texcoord. */
-    if (fog) {
-        i++;
-        if (r300->draw) {
-            draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE,
-                draw_find_vs_output(r300->draw, TGSI_SEMANTIC_FOG, 0));
+    /* Texture coordinates. */
+    gen_count = 0;
+    for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+        if (vs_outputs->generic[i] != ATTR_UNUSED) {
+            r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+                                  vs_outputs->generic[i]);
+            gen_count++;
         }
-        vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i);
-        vinfo->hwfmt[3] |= (4 << (3 * i));
     }
 
-    /* Texcoords. */
-    for (; i < texs; i++) {
-        if (r300->draw) {
-            draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE,
-                draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i));
-        }
-        vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i);
-        vinfo->hwfmt[3] |= (4 << (3 * i));
+    /* Fog coordinates. */
+    if (vs_outputs->fog != ATTR_UNUSED) {
+        r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+                              vs_outputs->fog);
+        gen_count++;
     }
 
-    draw_compute_vertex_size(vinfo);
+    assert(gen_count <= 8);
 }
 
-/* Update the PSC tables. */
-static void r300_vertex_psc(struct r300_context* r300,
-                            struct r300_vertex_info* vformat)
+/* Update the PSC tables for SW TCL, using Draw. */
+static void r300_swtcl_vertex_psc(struct r300_context *r300)
 {
-    struct r300_screen* r300screen = r300_screen(r300->context.screen);
-    struct vertex_info* vinfo = &vformat->vinfo;
-    int* tab = vformat->vs_tab;
+    struct r300_vertex_stream_state *vstream = r300->vertex_stream_state.state;
+    struct r300_vertex_shader* vs = r300->vs_state.state;
+    struct vertex_info* vinfo = &r300->vertex_info;
     uint16_t type, swizzle;
     enum pipe_format format;
     unsigned i, attrib_count;
+    int* vs_output_tab = vs->stream_loc_notcl;
 
-    /* Vertex shaders have no semantics on their inputs,
-     * so PSC should just route stuff based on their info,
-     * and not on attrib information. */
-    if (r300screen->caps->has_tcl) {
-        attrib_count = r300->vs->info.num_inputs;
-        DBG(r300, DBG_DRAW, "r300: routing %d attribs in psc for vs\n",
-                attrib_count);
-    } else {
-        attrib_count = vinfo->num_attribs;
-        DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count);
-        for (i = 0; i < attrib_count; i++) {
-            DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d,"
-                   " tab %d\n", vinfo->attrib[i].src_index,
-                   vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
-                   tab[i]);
-        }
-    }
+    /* XXX hax */
+    memset(vstream, 0, sizeof(struct r300_vertex_stream_state));
 
+    /* For each Draw attribute, route it to the fragment shader according
+     * to the vs_output_tab. */
+    attrib_count = vinfo->num_attribs;
+    DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count);
     for (i = 0; i < attrib_count; i++) {
+        DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d,"
+               " vs_output_tab %d\n", vinfo->attrib[i].src_index,
+               vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
+               vs_output_tab[i]);
+
         /* Make sure we have a proper destination for our attribute. */
-        assert(tab[i] != -1);
+        assert(vs_output_tab[i] != -1);
 
         format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
 
         /* Obtain the type of data in this attribute. */
         type = r300_translate_vertex_data_type(format) |
-            tab[i] << R300_DST_VEC_LOC_SHIFT;
+            vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT;
 
         /* Obtain the swizzle for this attribute. Note that the default
          * swizzle in the hardware is not XYZW! */
@@ -271,13 +141,11 @@ static void r300_vertex_psc(struct r300_context* r300,
 
         /* Add the attribute to the PSC table. */
         if (i & 1) {
-            vformat->vap_prog_stream_cntl[i >> 1] |= type << 16;
-
-            vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
+            vstream->vap_prog_stream_cntl[i >> 1] |= type << 16;
+            vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
         } else {
-            vformat->vap_prog_stream_cntl[i >> 1] |= type <<  0;
-
-            vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 0;
+            vstream->vap_prog_stream_cntl[i >> 1] |= type;
+            vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
         }
     }
 
@@ -285,242 +153,259 @@ static void r300_vertex_psc(struct r300_context* r300,
     if (i) {
         i -= 1;
     }
-    vformat->vap_prog_stream_cntl[i >> 1] |=
+    vstream->vap_prog_stream_cntl[i >> 1] |=
         (R300_LAST_VEC << (i & 1 ? 16 : 0));
+
+    vstream->count = (i >> 1) + 1;
+    r300->vertex_stream_state.dirty = TRUE;
+    r300->vertex_stream_state.size = (1 + vstream->count) * 2;
 }
 
-/* Set up the mappings from GB to US, for RS block. */
-static void r300_update_fs_tab(struct r300_context* r300,
-                               struct r300_vertex_info* vformat)
+static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr,
+                        boolean swizzle_0001)
 {
-    struct tgsi_shader_info* info = &r300->fs->info;
-    int i, cols = 0, texs = 0, cols_emitted = 0;
-    int* tab = vformat->fs_tab;
+    rs->ip[id] |= R300_RS_COL_PTR(ptr);
+    if (swizzle_0001) {
+        rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
+    } else {
+        rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
+    }
+    rs->inst[id] |= R300_RS_INST_COL_ID(id);
+}
 
-    for (i = 0; i < 16; i++) {
-        tab[i] = -1;
+static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset)
+{
+    rs->inst[id] |= R300_RS_INST_COL_CN_WRITE |
+                    R300_RS_INST_COL_ADDR(fp_offset);
+}
+
+static void r300_rs_tex(struct r300_rs_block* rs, int id, int ptr,
+                        boolean swizzle_X001)
+{
+    if (swizzle_X001) {
+        rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) |
+                      R300_RS_SEL_S(R300_RS_SEL_C0) |
+                      R300_RS_SEL_T(R300_RS_SEL_K0) |
+                      R300_RS_SEL_R(R300_RS_SEL_K0) |
+                      R300_RS_SEL_Q(R300_RS_SEL_K1);
+    } else {
+        rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) |
+                      R300_RS_SEL_S(R300_RS_SEL_C0) |
+                      R300_RS_SEL_T(R300_RS_SEL_C1) |
+                      R300_RS_SEL_R(R300_RS_SEL_C2) |
+                      R300_RS_SEL_Q(R300_RS_SEL_C3);
     }
+    rs->inst[id] |= R300_RS_INST_TEX_ID(id);
+}
 
-    assert(info->num_inputs <= 16);
-    for (i = 0; i < info->num_inputs; i++) {
-        switch (info->input_semantic_name[i]) {
-            case TGSI_SEMANTIC_COLOR:
-                tab[i] = INTERP_LINEAR;
-                cols++;
-                break;
-            case TGSI_SEMANTIC_POSITION:
-            case TGSI_SEMANTIC_PSIZE:
-                debug_printf("r300: Implementation error: Can't use "
-                        "pos attribs in fragshader yet!\n");
-                /* Pass through for now */
-            case TGSI_SEMANTIC_FOG:
-            case TGSI_SEMANTIC_GENERIC:
-                tab[i] = INTERP_PERSPECTIVE;
-                break;
-            default:
-                debug_printf("r300: Unknown vertex input %d\n",
-                    info->input_semantic_name[i]);
-                break;
-        }
+static void r300_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset)
+{
+    rs->inst[id] |= R300_RS_INST_TEX_CN_WRITE |
+                    R300_RS_INST_TEX_ADDR(fp_offset);
+}
+
+static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr,
+                        boolean swizzle_0001)
+{
+    rs->ip[id] |= R500_RS_COL_PTR(ptr);
+    if (swizzle_0001) {
+        rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
+    } else {
+        rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
     }
+    rs->inst[id] |= R500_RS_INST_COL_ID(id);
+}
 
-    /* Now that we know where everything is... */
-    DBG(r300, DBG_DRAW, "r300: fp input count: %d\n", info->num_inputs);
-    for (i = 0; i < info->num_inputs; i++) {
-        switch (tab[i]) {
-            case INTERP_LINEAR:
-                DBG(r300, DBG_DRAW, "r300: attrib: "
-                        "stack offset %d, color,    tab %d\n",
-                        i, cols_emitted);
-                tab[i] = cols_emitted;
-                cols_emitted++;
-                break;
-            case INTERP_PERSPECTIVE:
-                DBG(r300, DBG_DRAW, "r300: attrib: "
-                        "stack offset %d, texcoord, tab %d\n",
-                        i, cols + texs);
-                tab[i] = cols + texs;
-                texs++;
-                break;
-            case -1:
-                debug_printf("r300: Implementation error: Bad fp interp!\n");
-            default:
-                break;
-        }
+static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset)
+{
+    rs->inst[id] |= R500_RS_INST_COL_CN_WRITE |
+                    R500_RS_INST_COL_ADDR(fp_offset);
+}
+
+static void r500_rs_tex(struct r300_rs_block* rs, int id, int ptr,
+                        boolean swizzle_X001)
+{
+    int rs_tex_comp = ptr*4;
+
+    if (swizzle_X001) {
+        rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) |
+                      R500_RS_SEL_T(R500_RS_IP_PTR_K0) |
+                      R500_RS_SEL_R(R500_RS_IP_PTR_K0) |
+                      R500_RS_SEL_Q(R500_RS_IP_PTR_K1);
+    } else {
+        rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) |
+                      R500_RS_SEL_T(rs_tex_comp + 1) |
+                      R500_RS_SEL_R(rs_tex_comp + 2) |
+                      R500_RS_SEL_Q(rs_tex_comp + 3);
     }
+    rs->inst[id] |= R500_RS_INST_TEX_ID(id);
+}
 
+static void r500_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset)
+{
+    rs->inst[id] |= R500_RS_INST_TEX_CN_WRITE |
+                    R500_RS_INST_TEX_ADDR(fp_offset);
 }
 
-/* Set up the RS block. This is the part of the chipset that actually does
- * the rasterization of vertices into fragments. This is also the part of the
- * chipset that locks up if any part of it is even slightly wrong. */
+/* Set up the RS block.
+ *
+ * This is the part of the chipset that actually does the rasterization
+ * of vertices into fragments. This is also the part of the chipset that
+ * locks up if any part of it is even slightly wrong. */
 static void r300_update_rs_block(struct r300_context* r300,
-                                 struct r300_rs_block* rs)
+                                 struct r300_shader_semantics* vs_outputs,
+                                 struct r300_shader_semantics* fs_inputs)
 {
-    struct tgsi_shader_info* info = &r300->fs->info;
-    int col_count = 0, fp_offset = 0, i, tex_count = 0;
-    int rs_tex_comp = 0;
-
-    if (r300_screen(r300->context.screen)->caps->is_r500) {
-        for (i = 0; i < info->num_inputs; i++) {
-            switch (info->input_semantic_name[i]) {
-                case TGSI_SEMANTIC_COLOR:
-                    rs->ip[col_count] |=
-                        R500_RS_COL_PTR(col_count) |
-                        R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
-                    col_count++;
-                    break;
-                case TGSI_SEMANTIC_GENERIC:
-                    rs->ip[tex_count] |=
-                        R500_RS_SEL_S(rs_tex_comp) |
-                        R500_RS_SEL_T(rs_tex_comp + 1) |
-                        R500_RS_SEL_R(rs_tex_comp + 2) |
-                        R500_RS_SEL_Q(rs_tex_comp + 3);
-                    tex_count++;
-                    rs_tex_comp += 4;
-                    break;
-                default:
-                    break;
-            }
-        }
+    struct r300_rs_block rs = { { 0 } };
+    int i, col_count = 0, tex_count = 0, fp_offset = 0, count;
+    void (*rX00_rs_col)(struct r300_rs_block*, int, int, boolean);
+    void (*rX00_rs_col_write)(struct r300_rs_block*, int, int);
+    void (*rX00_rs_tex)(struct r300_rs_block*, int, int, boolean);
+    void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int);
+    boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED ||
+                              vs_outputs->bcolor[1] != ATTR_UNUSED;
+
+    if (r300->screen->caps.is_r500) {
+        rX00_rs_col       = r500_rs_col;
+        rX00_rs_col_write = r500_rs_col_write;
+        rX00_rs_tex       = r500_rs_tex;
+        rX00_rs_tex_write = r500_rs_tex_write;
+    } else {
+        rX00_rs_col       = r300_rs_col;
+        rX00_rs_col_write = r300_rs_col_write;
+        rX00_rs_tex       = r300_rs_tex;
+        rX00_rs_tex_write = r300_rs_tex_write;
+    }
 
-        /* Rasterize at least one color, or bad things happen. */
-        if ((col_count == 0) && (tex_count == 0)) {
-            rs->ip[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
+    /* Rasterize colors. */
+    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+        if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used ||
+            vs_outputs->color[1] != ATTR_UNUSED) {
+            /* Always rasterize if it's written by the VS,
+             * otherwise it locks up. */
+            rX00_rs_col(&rs, col_count, i, FALSE);
+
+            /* Write it to the FS input register if it's used by the FS. */
+            if (fs_inputs->color[i] != ATTR_UNUSED) {
+                rX00_rs_col_write(&rs, col_count, fp_offset);
+                fp_offset++;
+            }
             col_count++;
+        } else {
+            /* Skip the FS input register, leave it uninitialized. */
+            /* If we try to set it to (0,0,0,1), it will lock up. */
+            if (fs_inputs->color[i] != ATTR_UNUSED) {
+                fp_offset++;
+            }
         }
+    }
 
-        for (i = 0; i < tex_count; i++) {
-            rs->inst[i] |= R500_RS_INST_TEX_ID(i) |
-                R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_offset);
-            fp_offset++;
+    /* Rasterize texture coordinates. */
+    for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+        if (vs_outputs->generic[i] != ATTR_UNUSED) {
+            /* Always rasterize if it's written by the VS,
+             * otherwise it locks up. */
+            rX00_rs_tex(&rs, tex_count, tex_count, FALSE);
+
+            /* Write it to the FS input register if it's used by the FS. */
+            if (fs_inputs->generic[i] != ATTR_UNUSED) {
+                rX00_rs_tex_write(&rs, tex_count, fp_offset);
+                fp_offset++;
+            }
+            tex_count++;
+        } else {
+            /* Skip the FS input register, leave it uninitialized. */
+            /* If we try to set it to (0,0,0,1), it will lock up. */
+            if (fs_inputs->generic[i] != ATTR_UNUSED) {
+                fp_offset++;
+            }
         }
+    }
+
+    /* Rasterize fog coordinates. */
+    if (vs_outputs->fog != ATTR_UNUSED) {
+        /* Always rasterize if it's written by the VS,
+         * otherwise it locks up. */
+        rX00_rs_tex(&rs, tex_count, tex_count, TRUE);
 
-        for (i = 0; i < col_count; i++) {
-            rs->inst[i] |= R500_RS_INST_COL_ID(i) |
-                R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_offset);
+        /* Write it to the FS input register if it's used by the FS. */
+        if (fs_inputs->fog != ATTR_UNUSED) {
+            rX00_rs_tex_write(&rs, tex_count, fp_offset);
             fp_offset++;
         }
+        tex_count++;
     } else {
-        for (i = 0; i < info->num_inputs; i++) {
-            switch (info->input_semantic_name[i]) {
-                case TGSI_SEMANTIC_COLOR:
-                    rs->ip[col_count] |=
-                        R300_RS_COL_PTR(col_count) |
-                        R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
-                    col_count++;
-                    break;
-                case TGSI_SEMANTIC_GENERIC:
-                    rs->ip[tex_count] |=
-                        R300_RS_TEX_PTR(rs_tex_comp) |
-                        R300_RS_SEL_S(R300_RS_SEL_C0) |
-                        R300_RS_SEL_T(R300_RS_SEL_C1) |
-                        R300_RS_SEL_R(R300_RS_SEL_C2) |
-                        R300_RS_SEL_Q(R300_RS_SEL_C3);
-                    tex_count++;
-                    rs_tex_comp+=4;
-                    break;
-                default:
-                    break;
-            }
-        }
-
-        if (col_count == 0) {
-            rs->ip[0] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
-        }
-
-        if (tex_count == 0) {
-            rs->ip[0] |=
-                R300_RS_SEL_S(R300_RS_SEL_K0) |
-                R300_RS_SEL_T(R300_RS_SEL_K0) |
-                R300_RS_SEL_R(R300_RS_SEL_K0) |
-                R300_RS_SEL_Q(R300_RS_SEL_K1);
+        /* Skip the FS input register, leave it uninitialized. */
+        /* If we try to set it to (0,0,0,1), it will lock up. */
+        if (fs_inputs->fog != ATTR_UNUSED) {
+            fp_offset++;
         }
+    }
 
-        /* Rasterize at least one color, or bad things happen. */
-        if ((col_count == 0) && (tex_count == 0)) {
-            col_count++;
-        }
+    /* Rasterize WPOS. */
+    /* If the FS doesn't need it, it's not written by the VS. */
+    if (fs_inputs->wpos != ATTR_UNUSED) {
+        rX00_rs_tex(&rs, tex_count, tex_count, FALSE);
+        rX00_rs_tex_write(&rs, tex_count, fp_offset);
 
-        for (i = 0; i < tex_count; i++) {
-            rs->inst[i] |= R300_RS_INST_TEX_ID(i) |
-                R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_offset);
-            fp_offset++;
-        }
+        fp_offset++;
+        tex_count++;
+    }
 
-        for (i = 0; i < col_count; i++) {
-            rs->inst[i] |= R300_RS_INST_COL_ID(i) |
-                R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_offset);
-            fp_offset++;
-        }
+    /* Rasterize at least one color, or bad things happen. */
+    if (col_count == 0 && tex_count == 0) {
+        rX00_rs_col(&rs, 0, 0, TRUE);
+        col_count++;
     }
 
-    rs->count = (rs_tex_comp) | (col_count << R300_IC_COUNT_SHIFT) |
+    rs.count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) |
         R300_HIRES_EN;
 
-    rs->inst_count = MAX2(MAX2(col_count - 1, tex_count - 1), 0);
+    count = MAX3(col_count, tex_count, 1);
+    rs.inst_count = count - 1;
+
+    /* Now, after all that, see if we actually need to update the state. */
+    if (memcmp(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block))) {
+        memcpy(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block));
+        r300->rs_block_state.size = 5 + count*2;
+    }
 }
 
-/* Update the vertex format. */
+/* Update the shader-dependant states. */
 static void r300_update_derived_shader_state(struct r300_context* r300)
 {
-    struct r300_screen* r300screen = r300_screen(r300->context.screen);
-    struct r300_vertex_info* vformat;
-    struct r300_rs_block* rs_block;
-    int i;
-
-    /*
-    struct r300_shader_key* key;
-    struct r300_shader_derived_value* value;
-    key = CALLOC_STRUCT(r300_shader_key);
-    key->vs = r300->vs;
-    key->fs = r300->fs;
-
-    value = (struct r300_shader_derived_value*)
-        util_hash_table_get(r300->shader_hash_table, (void*)key);
-    if (value) {
-        //vformat = value->vformat;
-        rs_block = value->rs_block;
-
-        FREE(key);
-    } else {
-        rs_block = CALLOC_STRUCT(r300_rs_block);
-        value = CALLOC_STRUCT(r300_shader_derived_value);
-
-        r300_update_rs_block(r300, rs_block);
-
-        //value->vformat = vformat;
-        value->rs_block = rs_block;
-        util_hash_table_set(r300->shader_hash_table,
-            (void*)key, (void*)value);
-    } */
-
-    /* XXX This will be refactored ASAP. */
-    vformat = CALLOC_STRUCT(r300_vertex_info);
-    rs_block = CALLOC_STRUCT(r300_rs_block);
+    struct r300_vertex_shader* vs = r300->vs_state.state;
 
-    for (i = 0; i < 16; i++) {
-        vformat->vs_tab[i] = -1;
-        vformat->fs_tab[i] = -1;
-    }
-
-    r300_vs_tab_routes(r300, vformat);
-    r300_vertex_psc(r300, vformat);
-    r300_update_fs_tab(r300, vformat);
+    r300_update_rs_block(r300, &vs->outputs, &r300->fs->inputs);
+}
 
-    r300_update_rs_block(r300, rs_block);
+static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa)
+{
+    /* We are interested only in the cases when a new depth or stencil value
+     * can be written and changed. */
+
+    /* We might optionally check for [Z func: never] and inspect the stencil
+     * state in a similar fashion, but it's not terribly important. */
+    return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) ||
+           (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) ||
+           ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) &&
+            (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK));
+}
 
-    FREE(r300->vertex_info);
-    FREE(r300->rs_block);
+static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa)
+{
+    /* We are interested only in the cases when alpha testing can kill
+     * a fragment. */
+    uint32_t af = dsa->alpha_function;
 
-    r300->vertex_info = vformat;
-    r300->rs_block = rs_block;
-    r300->dirty_state |= (R300_NEW_VERTEX_FORMAT | R300_NEW_RS_BLOCK);
+    return (af & R300_FG_ALPHA_FUNC_ENABLE) &&
+           (af & R300_FG_ALPHA_FUNC_ALWAYS) != R300_FG_ALPHA_FUNC_ALWAYS;
 }
 
 static void r300_update_ztop(struct r300_context* r300)
 {
-    r300->ztop_state.z_buffer_top = R300_ZTOP_ENABLE;
+    struct r300_ztop_state* ztop_state =
+        (struct r300_ztop_state*)r300->ztop_state.state;
 
     /* This is important enough that I felt it warranted a comment.
      *
@@ -534,33 +419,117 @@ static void r300_update_ztop(struct r300_context* r300)
      * The docs claim that for the first three cases, if no ZS writes happen,
      * then ZTOP can be used.
      *
+     * (3) will never apply since we do not support chroma-keyed operations.
+     * (4) will need to be re-examined (and this comment updated) if/when
+     * Hyper-Z becomes supported.
+     *
      * Additionally, the following conditions require disabled ZTOP:
-     * ~) Depth writes in fragment shader
-     * ~) Outstanding occlusion queries
+     * 5) Depth writes in fragment shader
+     * 6) Outstanding occlusion queries
+     *
+     * This register causes stalls all the way from SC to CB when changed,
+     * but it is buffered on-chip so it does not hurt to write it if it has
+     * not changed.
      *
      * ~C.
      */
-    if (r300->dsa_state->alpha_function) {
-        r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
-    } else if (r300->fs->info.uses_kill) {
-        r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
-    } else if (r300_fragment_shader_writes_depth(r300->fs)) {
-        r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
-    } else if (r300->query_current) {
-        r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
+
+    /* ZS writes */
+    if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) &&
+           (r300_dsa_alpha_test_enabled(r300->dsa_state.state) ||/* (1) */
+            r300->fs->info.uses_kill)) {                         /* (2) */
+        ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
+    } else if (r300_fragment_shader_writes_depth(r300->fs)) {    /* (5) */
+        ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
+    } else if (r300->query_current) {                            /* (6) */
+        ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
+    } else {
+        ztop_state->z_buffer_top = R300_ZTOP_ENABLE;
+    }
+
+    r300->ztop_state.dirty = TRUE;
+}
+
+static void r300_merge_textures_and_samplers(struct r300_context* r300)
+{
+    struct r300_textures_state *state =
+        (struct r300_textures_state*)r300->textures_state.state;
+    struct r300_texture_sampler_state *texstate;
+    struct r300_sampler_state *sampler;
+    struct pipe_sampler_view *view;
+    struct r300_texture *tex;
+    unsigned min_level, max_level, i, size;
+    unsigned count = MIN2(state->texture_count, state->sampler_count);
+
+    state->tx_enable = 0;
+    state->count = 0;
+    size = 2;
+
+    for (i = 0; i < count; i++) {
+        if (state->fragment_sampler_views[i] && state->sampler_states[i]) {
+            state->tx_enable |= 1 << i;
+
+            view = state->fragment_sampler_views[i];
+            tex = r300_texture(view->texture);
+            sampler = state->sampler_states[i];
+
+            assert(view->format == tex->b.b.format);
+
+            texstate = &state->regs[i];
+            memcpy(texstate->format, &tex->state, sizeof(uint32_t)*3);
+            texstate->filter[0] = sampler->filter0;
+            texstate->filter[1] = sampler->filter1;
+            texstate->border_color = sampler->border_color;
+            texstate->tile_config = R300_TXO_MACRO_TILE(tex->macrotile) |
+                                    R300_TXO_MICRO_TILE(tex->microtile);
+
+            /* to emulate 1D textures through 2D ones correctly */
+            if (tex->b.b.target == PIPE_TEXTURE_1D) {
+                texstate->filter[0] &= ~R300_TX_WRAP_T_MASK;
+                texstate->filter[0] |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE);
+            }
+
+            if (tex->uses_pitch) {
+                /* NPOT textures don't support mip filter, unfortunately.
+                 * This prevents incorrect rendering. */
+                texstate->filter[0] &= ~R300_TX_MIN_FILTER_MIP_MASK;
+            } else {
+                /* determine min/max levels */
+                /* the MAX_MIP level is the largest (finest) one */
+                max_level = MIN3(sampler->max_lod + view->first_level,
+                                 tex->b.b.last_level, view->last_level);
+                min_level = MIN2(sampler->min_lod + view->first_level,
+                                 max_level);
+                texstate->format[0] |= R300_TX_NUM_LEVELS(max_level);
+                texstate->filter[0] |= R300_TX_MAX_MIP_LEVEL(min_level);
+            }
+
+            texstate->filter[0] |= i << 28;
+
+            size += 16;
+            state->count = i+1;
+        }
     }
+
+    r300->textures_state.size = size;
 }
 
 void r300_update_derived_state(struct r300_context* r300)
 {
-    /* XXX */
-    if (TRUE || r300->dirty_state &
-        (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER)) {
+    if (r300->rs_block_state.dirty) {
         r300_update_derived_shader_state(r300);
     }
 
-    if (r300->dirty_state &
-            (R300_NEW_DSA | R300_NEW_FRAGMENT_SHADER | R300_NEW_QUERY)) {
-        r300_update_ztop(r300);
+    if (r300->textures_state.dirty) {
+        r300_merge_textures_and_samplers(r300);
     }
+
+    if (r300->draw) {
+        memset(&r300->vertex_info, 0, sizeof(struct vertex_info));
+        r300_draw_emit_all_attribs(r300);
+        draw_compute_vertex_size(&r300->vertex_info);
+        r300_swtcl_vertex_psc(r300);
+    }
+
+    r300_update_ztop(r300);
 }