* Major endianness fixes on sky code today. The milestone sample and existing
authorFrank Ch. Eigler <fche@redhat.com>
Fri, 27 Feb 1998 21:52:40 +0000 (21:52 +0000)
committerFrank Ch. Eigler <fche@redhat.com>
Fri, 27 Feb 1998 21:52:40 +0000 (21:52 +0000)
  PKE tests run identically on SPARC/Solaris and x86/Linux.

* sky-pke.c (pke_io_{read,write}_buffer): Endianness fixes aka
  "E-fixes" in register and FIFO read/writes.
(pke_code_{pkemscalf,pkemscal}): E-fixes in VU CIA setting.
(pke_code_{mpg,unpack}): E-fixes in VU memory & tracking updates.
(pke_code_direct): E-fixes in GPUIF FIFO stuffing.

* sky-pke.h (PKE_MEM_WRITE): E-fixes in trace file writing.

* sky-vu0.c (vu0_attach): Allocate micro/data memory with zalloc
  to guarantee sufficient (16-byte) alignment.

* sky-vu1.c (vu1_attach): Ditto.
(vu1_io_read_register_window): *PARTIAL* E-fixes in register accesses.

* sky-libvpe.c (gif_write): E-fixes in GPUIF FIFO stuffing.

* sky-gpuif.c (gif_io_{read,write}_buffer): E-fixes in
  register and FIFO read/writes.

* sky-dma.c (do_dma_transfer_tag): E-fixes in tag reading.

sim/mips/sky-pke.c
sim/mips/sky-pke.h
sim/mips/sky-vu0.c
sim/mips/sky-vu1.c

index 6685d16b6f0f845c5c220dd3bedbf9be2327e12e..f2dbb988260b7a72fa7593c211830da5d5619412 100644 (file)
@@ -253,7 +253,7 @@ pke_io_read_buffer(device *me_,
        case PKE_REG_C1:
        case PKE_REG_C2:
        case PKE_REG_C3:
-         result[0] = me->regs[reg_num][0];
+         result[0] = H2T_4(me->regs[reg_num][0]);
          break;
 
          /* handle common case of write-only registers */
@@ -337,6 +337,10 @@ pke_io_write_buffer(device *me_,
       /* write user-given bytes into input */
       memcpy(((unsigned_1*) &input) + reg_byte, src, nr_bytes);
 
+      /* make words host-endian */
+      input[0] = T2H_4(input[0]);
+      /* we may ignore other words */
+
       /* handle writes to individual registers; clear `writeable' on error */
       switch(reg_num)
        {
@@ -443,7 +447,7 @@ pke_io_write_buffer(device *me_,
       unsigned_4 dma_tag_present = 0;
       int i;
 
-      /* collect potentially-partial quadword in write buffer */
+      /* collect potentially-partial quadword in write buffer; LE byte order */
       memcpy(((unsigned_1*)& me->fifo_qw_in_progress) + fifo_byte, src, nr_bytes);
       /* mark bytes written */
       for(i = fifo_byte; i < fifo_byte + nr_bytes; i++)
@@ -475,16 +479,18 @@ pke_io_write_buffer(device *me_,
          me->fifo_buffer_size = new_fifo_buffer_size;
        }
 
-      /* add new quadword at end of FIFO */
+      /* add new quadword at end of FIFO; store data in host-endian */
       fqw = & me->fifo[me->fifo_num_elements];
       fqw->word_class[0] = fqw->word_class[1] = 
        fqw->word_class[2] = fqw->word_class[3] = wc_unknown;
-      memcpy((void*) fqw->data, me->fifo_qw_in_progress, sizeof(quadword));
+      fqw->data[0] = T2H_4(me->fifo_qw_in_progress[0]);
+      fqw->data[1] = T2H_4(me->fifo_qw_in_progress[1]); 
+      fqw->data[2] = T2H_4(me->fifo_qw_in_progress[2]); 
+      fqw->data[3] = T2H_4(me->fifo_qw_in_progress[3]); 
       ASSERT(sizeof(unsigned_4) == 4);
       PKE_MEM_READ(me, (me->pke_number == 0 ? DMA_D0_MADR : DMA_D1_MADR),
-                  & fqw->source_address, /* target endian */
+                  & fqw->source_address, /* converted to host-endian */
                   4);
-      fqw->source_address = T2H_4(fqw->source_address);
       PKE_MEM_READ(me, (me->pke_number == 0 ? DMA_D0_PKTFLAG : DMA_D1_PKTFLAG),
                   & dma_tag_present,
                   4);
@@ -1250,8 +1256,9 @@ pke_code_pkemscal(struct pke_device* me, unsigned_4 pkecode)
       if(me->pke_number == 1)
        pke_flip_dbf(me);
 
-      /* compute new PC for VU */
+      /* compute new PC for VU (host byte-order) */
       vu_pc = BIT_MASK_GET(imm, 0, 15);
+      vu_pc = T2H_4(vu_pc);
 
       /* write new PC; callback function gets VU running */
       ASSERT(sizeof(unsigned_4) == 4);
@@ -1370,8 +1377,9 @@ pke_code_pkemscalf(struct pke_device* me, unsigned_4 pkecode)
       if(me->pke_number == 1)
        pke_flip_dbf(me);
 
-      /* compute new PC for VU */
+      /* compute new PC for VU (host byte-order) */
       vu_pc = BIT_MASK_GET(imm, 0, 15);
+      vu_pc = T2H_4(vu_pc);
 
       /* rewrite new PC; callback function gets VU running */
       ASSERT(sizeof(unsigned_4) == 4);
@@ -1573,7 +1581,6 @@ pke_code_mpg(struct pke_device* me, unsigned_4 pkecode)
              address_word vu_addr_max_size;
              unsigned_4 vu_lower_opcode, vu_upper_opcode;
              unsigned_4* operand;
-             unsigned_4 source_addr;
              struct fifo_quadword* fq;
              int next_num;
 
@@ -1605,21 +1612,20 @@ pke_code_mpg(struct pke_device* me, unsigned_4 pkecode)
              vu_upper_opcode = *pke_pc_operand(me, i*2 + 2);
              
              /* write data into VU memory */
-             /* lower (scalar) opcode comes in first word */
+             /* lower (scalar) opcode comes in first word ; macro performs H2T! */
              PKE_MEM_WRITE(me, vu_addr,
                            & vu_lower_opcode,
                            4);
-             /* upper (vector) opcode comes in second word */
+             /* upper (vector) opcode comes in second word ; H2T */
              ASSERT(sizeof(unsigned_4) == 4);
              PKE_MEM_WRITE(me, vu_addr + 4,
                            & vu_upper_opcode,
                            4);
              
              /* write tracking address in target byte-order */
-             source_addr = H2T_4(fq->source_address);
              ASSERT(sizeof(unsigned_4) == 4);
              PKE_MEM_WRITE(me, vutrack_addr,
-                           & source_addr,
+                           & fq->source_address,
                            4);
            } /* VU xfer loop */
 
@@ -1667,7 +1673,7 @@ pke_code_direct(struct pke_device* me, unsigned_4 pkecode)
     {
       /* VU idle */
       int i;
-      quadword fifo_data;
+      unsigned_16 fifo_data;
       
       /* "transferring" operand */
       PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
@@ -1678,14 +1684,14 @@ pke_code_direct(struct pke_device* me, unsigned_4 pkecode)
          unsigned_4* operand = pke_pc_operand(me, 1+i);
          
          /* collect word into quadword */
-         fifo_data[i % 4] = *operand;
+         *A4_16(&fifo_data, 3 - (i % 4)) = *operand;
          
          /* write to GPUIF FIFO only with full quadword */
          if(i % 4 == 3)
            {
              ASSERT(sizeof(fifo_data) == 16);
              PKE_MEM_WRITE(me, GIF_PATH2_FIFO_ADDR,
-                           fifo_data,
+                           fifo_data,
                            16);
            } /* write collected quadword */
          
@@ -1816,16 +1822,18 @@ pke_code_unpack(struct pke_device* me, unsigned_4 pkecode)
          /* compute address of tracking table entry */
          vutrack_addr = vutrack_addr_base + ((signed_8)vu_addr - (signed_8)vu_addr_base) / 4;
 
-         /* read old VU data word at address */
-         ASSERT(sizeof(vu_old_data) == 16);
-         PKE_MEM_READ(me, vu_addr,
-                      vu_old_data,
-                      16);
+         /* read old VU data word at address; reverse words if needed */
+         {
+           unsigned_16 vu_old_badwords;
+           ASSERT(sizeof(vu_old_badwords) == 16);
+           PKE_MEM_READ(me, vu_addr,
+                        &vu_old_badwords, 16);
+           vu_old_data[0] = * A4_16(& vu_old_badwords, 3);
+           vu_old_data[1] = * A4_16(& vu_old_badwords, 2);
+           vu_old_data[2] = * A4_16(& vu_old_badwords, 1);
+           vu_old_data[3] = * A4_16(& vu_old_badwords, 0);
+         }
 
-         /* yank memory out of little-endian order */
-         for(i=0; i<4; i++)
-           vu_old_data[i] = LE2H_4(vu_old_data[i]);
-         
          /* For cyclic unpack, next operand quadword may come from instruction stream
             or be zero. */
          if((num == 0 && cl == 0 && wl == 0) || /* shortcut clear */
@@ -1960,18 +1968,19 @@ pke_code_unpack(struct pke_device* me, unsigned_4 pkecode)
              ;
            }
 
-         /* yank memory into little-endian order */
-         for(i=0; i<4; i++)
-           vu_new_data[i] = H2LE_4(vu_new_data[i]);
-         
-         /* write replacement word */
-         ASSERT(sizeof(vu_new_data) == 16);
-         PKE_MEM_WRITE(me, vu_addr,
-                       vu_new_data,
-                       16);
-
-         /* write tracking address in target byte-order */
-         source_addr = H2T_4(source_addr);
+         /* write new VU data word at address; reverse words if needed */
+         {
+           unsigned_16 vu_new_badwords;
+           * A4_16(& vu_new_badwords, 3) = vu_new_data[0];
+           * A4_16(& vu_new_badwords, 2) = vu_new_data[1];
+           * A4_16(& vu_new_badwords, 1) = vu_new_data[2];
+           * A4_16(& vu_new_badwords, 0) = vu_new_data[3];
+           ASSERT(sizeof(vu_new_badwords) == 16);
+           PKE_MEM_WRITE(me, vu_addr,
+                        &vu_new_badwords, 16);
+         }
+
+         /* write tracking address */
          ASSERT(sizeof(unsigned_4) == 4);
          PKE_MEM_WRITE(me, vutrack_addr,
                        & source_addr,
index f8051c2b1e7e68c05da8dd6965f03952a72548d5..8244ee5380f1dfe67aefc928bedbd3e4b22247b0 100644 (file)
@@ -422,10 +422,12 @@ struct pke_device
          if((me)->fifo_trace_file != NULL) \
           { \
             int i; \
+            unsigned_##size value_te; \
+            value_te = H2T_##size(value); \
             fprintf((me)->fifo_trace_file, "# Write %2d bytes  to  ", size); \
             fprintf((me)->fifo_trace_file, "0x%08lx: ", (unsigned long)(addr)); \
             for(i=0; i<size; i++) \
-              fprintf((me)->fifo_trace_file, " %02x", ((unsigned_1*)(& value))[i]); \
+              fprintf((me)->fifo_trace_file, " %02x", ((unsigned_1*)(& value_te))[i]); \
             fprintf((me)->fifo_trace_file, "\n"); \
           } \
         } while(0)      
index 0d212fe1552aa0a3f99d923e12c07aa77a8d437f..ed52c1bc99e437bcdede4ca78837726d94b98b44 100644 (file)
@@ -7,8 +7,9 @@
 #include "sky-device.h"
 #include "sky-vu0.h"
 
-static char vu0_mem0_buffer[VU0_MEM0_SIZE];
-static char vu0_mem1_buffer[VU0_MEM1_SIZE];
+static char* vu0_mem0_buffer = 0;
+static char* vu0_mem1_buffer = 0;
+
 
 void 
 vu0_issue(void) 
@@ -62,6 +63,7 @@ vu0_attach(SIM_DESC sd)
                    &vu0_device,
                    NULL /*buffer*/);
 
+  vu0_mem0_buffer = zalloc(VU0_MEM0_SIZE);
   sim_core_attach (sd,
                   NULL,
                    0 /*level*/,
@@ -71,8 +73,9 @@ vu0_attach(SIM_DESC sd)
                    VU0_MEM0_SIZE /*nr_bytes*/,
                    0 /*modulo*/,
                    0 /*device*/,
-                   &vu0_mem0_buffer /*buffer*/);
+                   vu0_mem0_buffer /*buffer*/);
 
+  vu0_mem1_buffer = zalloc(VU0_MEM1_SIZE);
   sim_core_attach (sd,
                   NULL,
                    0 /*level*/,
@@ -82,5 +85,5 @@ vu0_attach(SIM_DESC sd)
                    VU0_MEM1_SIZE /*nr_bytes*/,
                    0 /*modulo*/,
                    0 /*device*/,
-                   &vu0_mem1_buffer /*buffer*/);
+                   vu0_mem1_buffer /*buffer*/);
 }
index b1b853c3a85097adcee296a03174a839a75d725d..40ddb034d641d50d621ccb67f142ea1824ace12d 100644 (file)
@@ -17,8 +17,8 @@ VectorUnitState vu1_state;
 
 #define sim_warning printf
 
-static char vu1_umem_buffer[VU1_MEM0_SIZE] __attribute__ ((aligned(16)));
-static char vu1_mem_buffer[VU1_MEM1_SIZE]  __attribute__ ((aligned(16)));
+static char* vu1_umem_buffer = 0;
+static char* vu1_mem_buffer = 0;
 
 void init_vu1(void);
 void init_vu(VectorUnitState *state, char* umem_buffer, char* mem_buffer);
@@ -78,10 +78,11 @@ vu1_io_read_register_window(device *me,
            if (vu1_state.runState == VU_RUN || vu1_state.runState == VU_BREAK)
                SET_BIT(stat, VPU_STAT_VBS1_BIT);
            
-           *(u_long*)&source_buffer[VPE1_STAT - VU1_REGISTER_WINDOW_START] = stat;
+           *(u_long*)&source_buffer[VPE1_STAT - VU1_REGISTER_WINDOW_START] = H2T_4(stat);
        }
 
-       *(u_long*)&source_buffer[VU1_CIA  - VU1_REGISTER_WINDOW_START] = vu1_state.junk._vpepc;
+       *(u_long*)&source_buffer[VU1_CIA  - VU1_REGISTER_WINDOW_START] = H2T_4(vu1_state.junk._vpepc);
+       /* XXX: other H2T_N's needed around here. */
 
 #if 0
        printf("%s: Read: %x, %d, dest: %x, space: %d, %x!\n", me->name, (int)addr, nr_bytes, (int)dest, space, *(int*)&(vu1_state.regs.VPE_STAT));
@@ -167,6 +168,7 @@ vu1_init(SIM_DESC sd)
                    &vu1_device,
                    NULL /*buffer*/);
 
+  vu1_umem_buffer = zalloc(VU1_MEM0_SIZE);
   sim_core_attach (sd,
                   NULL,
                    0 /*level*/,
@@ -176,8 +178,9 @@ vu1_init(SIM_DESC sd)
                    VU1_MEM0_SIZE /*nr_bytes*/,
                    0 /*modulo*/,
                    0 /*device*/,
-                   &vu1_umem_buffer /*buffer*/);
+                   vu1_umem_buffer /*buffer*/);
 
+  vu1_mem_buffer = zalloc(VU1_MEM1_SIZE);
   sim_core_attach (sd,
                   NULL,
                    0 /*level*/,
@@ -187,7 +190,7 @@ vu1_init(SIM_DESC sd)
                    VU1_MEM1_SIZE /*nr_bytes*/,
                    0 /*modulo*/,
                    0 /*device*/,
-                   &vu1_mem_buffer /*buffer*/);
+                   vu1_mem_buffer /*buffer*/);
 
   init_vu1();
   /*initvpe();*/