PCem

changeset 110:036dc3a418ac

Initial S3 ViRGE video overlay emulation.
author TomW
date Fri Jun 13 18:43:25 2014 +0100
parents 26c6305371cd
children 912e602b3406
files src/vid_ati18800.c src/vid_ati28800.c src/vid_ati_mach64.c src/vid_cl5429.c src/vid_et4000.c src/vid_et4000w32.c src/vid_oti067.c src/vid_paradise.c src/vid_s3.c src/vid_s3_virge.c src/vid_svga.c src/vid_svga.h src/vid_tgui9440.c src/vid_tvga.c src/vid_vga.c
diffstat 15 files changed, 381 insertions(+), 28 deletions(-) [+]
line diff
     1.1 --- a/src/vid_ati18800.c	Mon Jun 09 18:13:57 2014 +0100
     1.2 +++ b/src/vid_ati18800.c	Fri Jun 13 18:43:25 2014 +0100
     1.3 @@ -130,6 +130,7 @@
     1.4          svga_init(&ati18800->svga, ati18800, 1 << 19, /*512kb*/
     1.5                     NULL,
     1.6                     ati18800_in, ati18800_out,
     1.7 +                   NULL,
     1.8                     NULL);
     1.9  
    1.10          io_sethandler(0x01ce, 0x0002, ati18800_in, NULL, NULL, ati18800_out, NULL, NULL, ati18800);
     2.1 --- a/src/vid_ati28800.c	Mon Jun 09 18:13:57 2014 +0100
     2.2 +++ b/src/vid_ati28800.c	Fri Jun 13 18:43:25 2014 +0100
     2.3 @@ -151,6 +151,7 @@
     2.4          svga_init(&ati28800->svga, ati28800, 1 << 19, /*512kb*/
     2.5                     ati28800_recalctimings,
     2.6                     ati28800_in, ati28800_out,
     2.7 +                   NULL,
     2.8                     NULL);
     2.9  
    2.10          io_sethandler(0x01ce, 0x0002, ati28800_in, NULL, NULL, ati28800_out, NULL, NULL, ati28800);
     3.1 --- a/src/vid_ati_mach64.c	Mon Jun 09 18:13:57 2014 +0100
     3.2 +++ b/src/vid_ati_mach64.c	Fri Jun 13 18:43:25 2014 +0100
     3.3 @@ -2204,7 +2204,8 @@
     3.4          svga_init(&mach64->svga, mach64, 1 << 22, /*4mb*/
     3.5                     mach64_recalctimings,
     3.6                     mach64_in, mach64_out,
     3.7 -                   mach64_hwcursor_draw); 
     3.8 +                   mach64_hwcursor_draw,
     3.9 +                   NULL);
    3.10  
    3.11          rom_init(&mach64->bios_rom, "roms/mach64gx/bios.bin", 0xc0000, 0x8000, 0x7fff, 0, MEM_MAPPING_EXTERNAL);
    3.12          if (PCI)
     4.1 --- a/src/vid_cl5429.c	Mon Jun 09 18:13:57 2014 +0100
     4.2 +++ b/src/vid_cl5429.c	Fri Jun 13 18:43:25 2014 +0100
     4.3 @@ -824,7 +824,8 @@
     4.4          svga_init(&gd5429->svga, gd5429, 1 << 21, /*2mb*/
     4.5                     gd5429_recalctimings,
     4.6                     gd5429_in, gd5429_out,
     4.7 -                   gd5429_hwcursor_draw);
     4.8 +                   gd5429_hwcursor_draw,
     4.9 +                   NULL);
    4.10  
    4.11          mem_mapping_set_handler(&gd5429->svga.mapping, gd5429_read, NULL, NULL, gd5429_write, NULL, NULL);
    4.12          mem_mapping_set_p(&gd5429->svga.mapping, gd5429);
     5.1 --- a/src/vid_et4000.c	Mon Jun 09 18:13:57 2014 +0100
     5.2 +++ b/src/vid_et4000.c	Fri Jun 13 18:43:25 2014 +0100
     5.3 @@ -140,6 +140,7 @@
     5.4          svga_init(&et4000->svga, et4000, 1 << 20, /*1mb*/
     5.5                     et4000_recalctimings,
     5.6                     et4000_in, et4000_out,
     5.7 +                   NULL,
     5.8                     NULL);
     5.9          
    5.10          return et4000;
     6.1 --- a/src/vid_et4000w32.c	Mon Jun 09 18:13:57 2014 +0100
     6.2 +++ b/src/vid_et4000w32.c	Fri Jun 13 18:43:25 2014 +0100
     6.3 @@ -1016,7 +1016,8 @@
     6.4          svga_init(&et4000->svga, et4000, 1 << 21, /*2mb*/
     6.5                     et4000w32p_recalctimings,
     6.6                     et4000w32p_in, et4000w32p_out,
     6.7 -                   et4000w32p_hwcursor_draw); 
     6.8 +                   et4000w32p_hwcursor_draw,
     6.9 +                   NULL); 
    6.10  
    6.11          rom_init(&et4000->bios_rom, "roms/et4000w32.bin", 0xc0000, 0x8000, 0x7fff, 0, MEM_MAPPING_EXTERNAL);
    6.12          if (PCI)
     7.1 --- a/src/vid_oti067.c	Mon Jun 09 18:13:57 2014 +0100
     7.2 +++ b/src/vid_oti067.c	Fri Jun 13 18:43:25 2014 +0100
     7.3 @@ -123,6 +123,7 @@
     7.4          svga_init(&oti067->svga, oti067, 1 << 19, /*512kb*/
     7.5                     oti067_recalctimings,
     7.6                     oti067_in, oti067_out,
     7.7 +                   NULL,
     7.8                     NULL);
     7.9  
    7.10          io_sethandler(0x03c0, 0x0020, oti067_in, NULL, NULL, oti067_out, NULL, NULL, oti067);
     8.1 --- a/src/vid_paradise.c	Mon Jun 09 18:13:57 2014 +0100
     8.2 +++ b/src/vid_paradise.c	Fri Jun 13 18:43:25 2014 +0100
     8.3 @@ -264,6 +264,7 @@
     8.4          svga_init(&paradise->svga, paradise, 1 << 18, /*256kb*/
     8.5                     NULL,
     8.6                     paradise_in, paradise_out,
     8.7 +                   NULL,
     8.8                     NULL);
     8.9  
    8.10          mem_mapping_set_handler(&paradise->svga.mapping, paradise_read, NULL, NULL, paradise_write, NULL, NULL);
    8.11 @@ -294,6 +295,7 @@
    8.12          svga_init(&paradise->svga, paradise, 1 << 19, /*512kb*/
    8.13                     paradise_recalctimings,
    8.14                     paradise_in, paradise_out,
    8.15 +                   NULL,
    8.16                     NULL);
    8.17  
    8.18          mem_mapping_set_handler(&paradise->svga.mapping, paradise_read, NULL, NULL, paradise_write, NULL, NULL);
     9.1 --- a/src/vid_s3.c	Mon Jun 09 18:13:57 2014 +0100
     9.2 +++ b/src/vid_s3.c	Fri Jun 13 18:43:25 2014 +0100
     9.3 @@ -1881,7 +1881,8 @@
     9.4          svga_init(&s3->svga, s3, 1 << 22, /*4mb - 864 supports 8mb but buggy VESA driver reports 0mb*/
     9.5                     s3_recalctimings,
     9.6                     s3_in, s3_out,
     9.7 -                   s3_hwcursor_draw);
     9.8 +                   s3_hwcursor_draw,
     9.9 +                   NULL);
    9.10  
    9.11          svga->crtc[0x36] = 1 | (2 << 2) | (1 << 4) | (4 << 5);
    9.12          svga->crtc[0x37] = 1 | (7 << 5);
    10.1 --- a/src/vid_s3_virge.c	Mon Jun 09 18:13:57 2014 +0100
    10.2 +++ b/src/vid_s3_virge.c	Fri Jun 13 18:43:25 2014 +0100
    10.3 @@ -123,15 +123,20 @@
    10.4                  uint32_t sec_fb0, sec_fb1;
    10.5                  uint32_t sec_stride;
    10.6                  uint32_t overlay_ctrl;
    10.7 -                uint32_t k1_vert_scale;
    10.8 -                uint32_t k2_vert_scale;
    10.9 -                uint32_t dda_vert_accumulator;
   10.10 +                 int32_t k1_vert_scale;
   10.11 +                 int32_t k2_vert_scale;
   10.12 +                 int32_t dda_vert_accumulator;
   10.13 +                 int32_t k1_horiz_scale;
   10.14 +                 int32_t k2_horiz_scale;
   10.15 +                 int32_t dda_horiz_accumulator;
   10.16                  uint32_t fifo_ctrl;
   10.17                  uint32_t pri_start;
   10.18                  uint32_t pri_size;
   10.19                  uint32_t sec_start;
   10.20                  uint32_t sec_size;
   10.21                  
   10.22 +                int sdif;
   10.23 +                
   10.24                  int pri_x, pri_y, pri_w, pri_h;
   10.25                  int sec_x, sec_y, sec_w, sec_h;
   10.26          } streams;
   10.27 @@ -224,14 +229,16 @@
   10.28                  //return;
   10.29  
   10.30                  case 0x3d4:
   10.31 -                svga->crtcreg = val & 0x7f;
   10.32 +                svga->crtcreg = val;// & 0x7f;
   10.33                  return;
   10.34                  case 0x3d5:
   10.35 -//                pclog("Write CRTC R%02X %02X\n", svga->crtcreg, val);
   10.36 +                //pclog("Write CRTC R%02X %02X  %04x(%08x):%08x\n", svga->crtcreg, val, CS, cs, pc);
   10.37                  if (svga->crtcreg <= 7 && svga->crtc[0x11] & 0x80) 
   10.38                          return;
   10.39                  if (svga->crtcreg >= 0x20 && svga->crtcreg != 0x38 && (svga->crtc[0x38] & 0xcc) != 0x48) 
   10.40                          return;
   10.41 +                if (svga->crtcreg >= 0x80)
   10.42 +                        return;
   10.43                  old = svga->crtc[svga->crtcreg];
   10.44                  svga->crtc[svga->crtcreg] = val;
   10.45                  switch (svga->crtcreg)
   10.46 @@ -350,7 +357,7 @@
   10.47                  ret = svga->crtcreg;
   10.48                  break;
   10.49                  case 0x3D5:
   10.50 -//                pclog("Read CRTC R%02X %04X:%04X (%02x)\n", svga->crtcreg, CS, pc, svga->crtc[svga->crtcreg]);
   10.51 +                //pclog("Read CRTC R%02X %04X:%04X (%02x)\n", svga->crtcreg, CS, pc, svga->crtc[svga->crtcreg]);
   10.52                  switch (svga->crtcreg)
   10.53                  {
   10.54                          case 0x2d: ret = virge->virge_id_high; break; /*Extended chip ID*/
   10.55 @@ -391,7 +398,7 @@
   10.56          if ((svga->crtc[0x67] & 0xc) != 0xc) /*VGA mode*/
   10.57          {
   10.58                  svga->ma_latch |= (virge->ma_ext << 16);
   10.59 -pclog("VGA mode\n");
   10.60 +//pclog("VGA mode\n");
   10.61                  if (svga->crtc[0x51] & 0x30)      svga->rowoffset += (svga->crtc[0x51] & 0x30) << 4;
   10.62                  else if (svga->crtc[0x43] & 0x04) svga->rowoffset += 0x100;
   10.63                  if (!svga->rowoffset) svga->rowoffset = 256;
   10.64 @@ -428,6 +435,7 @@
   10.65                  {
   10.66                          svga->rowoffset = (svga->rowoffset * 3) / 4; /*Hack*/
   10.67                  }
   10.68 +//pclog("VGA mode   x_disp=%i dispend=%i vtotal=%i\n", svga->hdisp, svga->dispend, svga->vtotal);
   10.69          }
   10.70          else /*Streams mode*/
   10.71          {
   10.72 @@ -436,8 +444,21 @@
   10.73                  else
   10.74                          svga->ma_latch = virge->streams.pri_fb0 >> 2;
   10.75                          
   10.76 -                svga->hdisp = virge->streams.pri_w;
   10.77 -pclog("Streams mode   x_disp=%i\n", svga->hdisp);                
   10.78 +                svga->hdisp = virge->streams.pri_w + 1;
   10.79 +                svga->dispend = virge->streams.pri_h;
   10.80 +                
   10.81 +                svga->overlay.x = virge->streams.sec_x - virge->streams.pri_x;
   10.82 +                svga->overlay.y = virge->streams.sec_y - virge->streams.pri_y;
   10.83 +                svga->overlay.ysize = virge->streams.sec_h;
   10.84 +
   10.85 +                if (virge->streams.buffer_ctrl & 2)
   10.86 +                        svga->overlay.addr = virge->streams.sec_fb1;
   10.87 +                else
   10.88 +                        svga->overlay.addr = virge->streams.sec_fb0;
   10.89 +
   10.90 +                svga->overlay.ena = (svga->overlay.x >= 0);
   10.91 +                svga->overlay.v_acc = virge->streams.dda_vert_accumulator;
   10.92 +//pclog("Streams mode   x_disp=%i dispend=%i vtotal=%i  x=%i y=%i ysize=%i\n", svga->hdisp, svga->dispend, svga->vtotal, svga->overlay.x, svga->overlay.y, svga->overlay.ysize);
   10.93                  svga->rowoffset = virge->streams.pri_stride >> 3;
   10.94  
   10.95                  switch ((virge->streams.pri_ctrl >> 24) & 0x7)
   10.96 @@ -778,7 +799,7 @@
   10.97          virge_t *virge = (virge_t *)p;
   10.98          svga_t *svga = &virge->svga;
   10.99          reg_writes++;
  10.100 -//        if ((addr & 0xfffc) >= 0x8000)
  10.101 +//        if ((addr & 0xfffc) >= 0x8000 && (addr & 0xfffc) < 0x8400)
  10.102  //                pclog("New MMIO writel %08X %08X %04x(%08x):%08x\n", addr, val, CS, cs, pc);
  10.103  
  10.104          if ((addr & 0xfffc) < 0x8000)
  10.105 @@ -800,12 +821,22 @@
  10.106                  break;
  10.107                  case 0x8190:
  10.108                  virge->streams.sec_ctrl = val;
  10.109 +                virge->streams.dda_horiz_accumulator = val & 0xfff;
  10.110 +                if (val & (1 << 11))
  10.111 +                        virge->streams.dda_horiz_accumulator |= 0xfffff800;
  10.112 +                virge->streams.sdif = (val >> 24) & 7;
  10.113                  break;
  10.114                  case 0x8194:
  10.115                  virge->streams.chroma_upper_bound = val;
  10.116                  break;
  10.117                  case 0x8198:
  10.118                  virge->streams.sec_filter = val;
  10.119 +                virge->streams.k1_horiz_scale = val & 0x7ff;
  10.120 +                if (val & (1 << 10))
  10.121 +                        virge->streams.k1_horiz_scale |= 0xfffff800;
  10.122 +                virge->streams.k2_horiz_scale = (val >> 16) & 0x7ff;
  10.123 +                if ((val >> 16) & (1 << 10))
  10.124 +                        virge->streams.k2_horiz_scale |= 0xfffff800;
  10.125                  break;
  10.126                  case 0x81a0:
  10.127                  virge->streams.blend_ctrl = val;
  10.128 @@ -832,24 +863,36 @@
  10.129                  break;
  10.130                  case 0x81d0:
  10.131                  virge->streams.sec_fb0 = val;
  10.132 +                s3_virge_recalctimings(svga);
  10.133 +                svga->fullchange = changeframecount;
  10.134                  break;
  10.135                  case 0x81d4:
  10.136                  virge->streams.sec_fb1 = val;
  10.137 +                s3_virge_recalctimings(svga);
  10.138 +                svga->fullchange = changeframecount;
  10.139                  break;
  10.140                  case 0x81d8:
  10.141                  virge->streams.sec_stride = val;
  10.142 +                s3_virge_recalctimings(svga);
  10.143 +                svga->fullchange = changeframecount;
  10.144                  break;
  10.145                  case 0x81dc:
  10.146                  virge->streams.overlay_ctrl = val;
  10.147                  break;
  10.148                  case 0x81e0:
  10.149 -                virge->streams.k1_vert_scale = val;
  10.150 +                virge->streams.k1_vert_scale = val & 0x7ff;
  10.151 +                if (val & (1 << 10))
  10.152 +                        virge->streams.k1_vert_scale |= 0xfffff800;
  10.153                  break;
  10.154                  case 0x81e4:
  10.155 -                virge->streams.k2_vert_scale = val;
  10.156 +                virge->streams.k2_vert_scale = val & 0x7ff;
  10.157 +                if (val & (1 << 10))
  10.158 +                        virge->streams.k2_vert_scale |= 0xfffff800;
  10.159                  break;
  10.160                  case 0x81e8:
  10.161 -                virge->streams.dda_vert_accumulator = val;
  10.162 +                virge->streams.dda_vert_accumulator = val & 0xfff;
  10.163 +                if (val & (1 << 11))
  10.164 +                        virge->streams.dda_vert_accumulator |= 0xfffff800;
  10.165                  break;
  10.166                  case 0x81ec:
  10.167                  virge->streams.fifo_ctrl = val;
  10.168 @@ -872,11 +915,15 @@
  10.169                  virge->streams.sec_start = val;
  10.170                  virge->streams.sec_x = (val >> 16) & 0x7ff;
  10.171                  virge->streams.sec_y = val & 0x7ff;                
  10.172 +                s3_virge_recalctimings(svga);
  10.173 +                svga->fullchange = changeframecount;
  10.174                  break;
  10.175                  case 0x81fc:
  10.176                  virge->streams.sec_size = val;
  10.177                  virge->streams.sec_w = (val >> 16) & 0x7ff;
  10.178                  virge->streams.sec_h = val & 0x7ff;                
  10.179 +                s3_virge_recalctimings(svga);
  10.180 +                svga->fullchange = changeframecount;
  10.181                  break;
  10.182                  
  10.183                  case 0xa000: case 0xa004: case 0xa008: case 0xa00c:
  10.184 @@ -2547,6 +2594,279 @@
  10.185          }
  10.186  }
  10.187  
  10.188 +#define DECODE_YCbCr()                                                  \
  10.189 +        do                                                              \
  10.190 +        {                                                               \
  10.191 +                int c;                                                  \
  10.192 +                                                                        \
  10.193 +                for (c = 0; c < 2; c++)                                 \
  10.194 +                {                                                       \
  10.195 +                        uint8_t y1, y2;                                 \
  10.196 +                        int8_t Cr, Cb;                                  \
  10.197 +                        int dR, dG, dB;                                 \
  10.198 +                                                                        \
  10.199 +                        y1 = src[0];                                    \
  10.200 +                        Cr = src[1] - 0x80;                             \
  10.201 +                        y2 = src[2];                                    \
  10.202 +                        Cb = src[3] - 0x80;                             \
  10.203 +                        src += 4;                                       \
  10.204 +                                                                        \
  10.205 +                        dR = (359*Cr) >> 8;                             \
  10.206 +                        dG = (88*Cb + 183*Cr) >> 8;                     \
  10.207 +                        dB = (453*Cb) >> 8;                             \
  10.208 +                                                                        \
  10.209 +                        r[x_write] = y1 + dR;                           \
  10.210 +                        CLAMP(r[x_write]);                              \
  10.211 +                        g[x_write] = y1 - dG;                           \
  10.212 +                        CLAMP(g[x_write]);                              \
  10.213 +                        b[x_write] = y1 + dB;                           \
  10.214 +                        CLAMP(b[x_write]);                              \
  10.215 +                                                                        \
  10.216 +                        r[x_write+1] = y2 + dR;                         \
  10.217 +                        CLAMP(r[x_write+1]);                            \
  10.218 +                        g[x_write+1] = y2 - dG;                         \
  10.219 +                        CLAMP(g[x_write+1]);                            \
  10.220 +                        b[x_write+1] = y2 + dB;                         \
  10.221 +                        CLAMP(b[x_write+1]);                            \
  10.222 +                                                                        \
  10.223 +                        x_write = (x_write + 2) & 7;                    \
  10.224 +                }                                                       \
  10.225 +        } while (0)
  10.226 +
  10.227 +/*Both YUV formats are untested*/
  10.228 +#define DECODE_YUV211()                                         \
  10.229 +        do                                                      \
  10.230 +        {                                                       \
  10.231 +                uint8_t y1, y2, y3, y4;                         \
  10.232 +                int8_t U, V;                                    \
  10.233 +                int dR, dG, dB;                                 \
  10.234 +                                                                \
  10.235 +                U = src[0] - 0x80;                              \
  10.236 +                y1 = (298 * (src[1] - 16)) >> 8;                \
  10.237 +                y2 = (298 * (src[2] - 16)) >> 8;                \
  10.238 +                V = src[3] - 0x80;                              \
  10.239 +                y3 = (298 * (src[4] - 16)) >> 8;                \
  10.240 +                y4 = (298 * (src[5] - 16)) >> 8;                \
  10.241 +                src += 6;                                       \
  10.242 +                                                                \
  10.243 +                dR = (309*V) >> 8;                              \
  10.244 +                dG = (100*U + 208*V) >> 8;                      \
  10.245 +                dB = (516*U) >> 8;                              \
  10.246 +                                                                \
  10.247 +                r[x_write] = y1 + dR;                           \
  10.248 +                CLAMP(r[x_write]);                              \
  10.249 +                g[x_write] = y1 - dG;                           \
  10.250 +                CLAMP(g[x_write]);                              \
  10.251 +                b[x_write] = y1 + dB;                           \
  10.252 +                CLAMP(b[x_write]);                              \
  10.253 +                                                                \
  10.254 +                r[x_write+1] = y2 + dR;                         \
  10.255 +                CLAMP(r[x_write+1]);                            \
  10.256 +                g[x_write+1] = y2 - dG;                         \
  10.257 +                CLAMP(g[x_write+1]);                            \
  10.258 +                b[x_write+1] = y2 + dB;                         \
  10.259 +                CLAMP(b[x_write+1]);                            \
  10.260 +                                                                \
  10.261 +                r[x_write+2] = y2 + dR;                         \
  10.262 +                CLAMP(r[x_write+2]);                            \
  10.263 +                g[x_write+2] = y2 - dG;                         \
  10.264 +                CLAMP(g[x_write+2]);                            \
  10.265 +                b[x_write+2] = y2 + dB;                         \
  10.266 +                CLAMP(b[x_write+2]);                            \
  10.267 +                                                                \
  10.268 +                r[x_write+3] = y2 + dR;                         \
  10.269 +                CLAMP(r[x_write+3]);                            \
  10.270 +                g[x_write+3] = y2 - dG;                         \
  10.271 +                CLAMP(g[x_write+3]);                            \
  10.272 +                b[x_write+3] = y2 + dB;                         \
  10.273 +                CLAMP(b[x_write+3]);                            \
  10.274 +                                                                \
  10.275 +                x_write = (x_write + 4) & 7;                    \
  10.276 +        } while (0)
  10.277 +
  10.278 +#define DECODE_YUV422()                                                 \
  10.279 +        do                                                              \
  10.280 +        {                                                               \
  10.281 +                int c;                                                  \
  10.282 +                                                                        \
  10.283 +                for (c = 0; c < 2; c++)                                 \
  10.284 +                {                                                       \
  10.285 +                        uint8_t y1, y2;                                 \
  10.286 +                        int8_t U, V;                                    \
  10.287 +                        int dR, dG, dB;                                 \
  10.288 +                                                                        \
  10.289 +                        U = src[0] - 0x80;                              \
  10.290 +                        y1 = (298 * (src[1] - 16)) >> 8;                \
  10.291 +                        V = src[2] - 0x80;                              \
  10.292 +                        y2 = (298 * (src[3] - 16)) >> 8;                \
  10.293 +                        src += 4;                                       \
  10.294 +                                                                        \
  10.295 +                        dR = (309*V) >> 8;                              \
  10.296 +                        dG = (100*U + 208*V) >> 8;                      \
  10.297 +                        dB = (516*U) >> 8;                              \
  10.298 +                                                                        \
  10.299 +                        r[x_write] = y1 + dR;                           \
  10.300 +                        CLAMP(r[x_write]);                              \
  10.301 +                        g[x_write] = y1 - dG;                           \
  10.302 +                        CLAMP(g[x_write]);                              \
  10.303 +                        b[x_write] = y1 + dB;                           \
  10.304 +                        CLAMP(b[x_write]);                              \
  10.305 +                                                                        \
  10.306 +                        r[x_write+1] = y2 + dR;                         \
  10.307 +                        CLAMP(r[x_write+1]);                            \
  10.308 +                        g[x_write+1] = y2 - dG;                         \
  10.309 +                        CLAMP(g[x_write+1]);                            \
  10.310 +                        b[x_write+1] = y2 + dB;                         \
  10.311 +                        CLAMP(b[x_write+1]);                            \
  10.312 +                                                                        \
  10.313 +                        x_write = (x_write + 2) & 7;                    \
  10.314 +                }                                                       \
  10.315 +        } while (0)
  10.316 +
  10.317 +#define DECODE_RGB555()                                                 \
  10.318 +        do                                                              \
  10.319 +        {                                                               \
  10.320 +                int c;                                                  \
  10.321 +                                                                        \
  10.322 +                for (c = 0; c < 4; c++)                                 \
  10.323 +                {                                                       \
  10.324 +                        uint16_t dat;                                   \
  10.325 +                                                                        \
  10.326 +                        dat = *(uint16_t *)src;                         \
  10.327 +                        src += 2;                                       \
  10.328 +                                                                        \
  10.329 +                        r[x_write + c] = ((dat & 0x001f) << 3) | ((dat & 0x001f) >> 2); \
  10.330 +                        g[x_write + c] = ((dat & 0x03e0) >> 2) | ((dat & 0x03e0) >> 7); \
  10.331 +                        b[x_write + c] = ((dat & 0x7c00) >> 7) | ((dat & 0x7c00) >> 12); \
  10.332 +                }                                                       \
  10.333 +                x_write = (x_write + 4) & 7;                            \
  10.334 +        } while (0)
  10.335 +
  10.336 +#define DECODE_RGB565()                                                 \
  10.337 +        do                                                              \
  10.338 +        {                                                               \
  10.339 +                int c;                                                  \
  10.340 +                                                                        \
  10.341 +                for (c = 0; c < 4; c++)                                 \
  10.342 +                {                                                       \
  10.343 +                        uint16_t dat;                                   \
  10.344 +                                                                        \
  10.345 +                        dat = *(uint16_t *)src;                         \
  10.346 +                        src += 2;                                       \
  10.347 +                                                                        \
  10.348 +                        r[x_write + c] = ((dat & 0x001f) << 3) | ((dat & 0x001f) >> 2); \
  10.349 +                        g[x_write + c] = ((dat & 0x07e0) >> 3) | ((dat & 0x07e0) >> 9); \
  10.350 +                        b[x_write + c] = ((dat & 0xf800) >> 8) | ((dat & 0xf800) >> 13); \
  10.351 +                }                                                       \
  10.352 +                x_write = (x_write + 4) & 7;                            \
  10.353 +        } while (0)
  10.354 +
  10.355 +#define DECODE_RGB888()                                                 \
  10.356 +        do                                                              \
  10.357 +        {                                                               \
  10.358 +                int c;                                                  \
  10.359 +                                                                        \
  10.360 +                for (c = 0; c < 4; c++)                                 \
  10.361 +                {                                                       \
  10.362 +                        r[x_write + c] = src[0];                        \
  10.363 +                        g[x_write + c] = src[1];                        \
  10.364 +                        b[x_write + c] = src[2];                        \
  10.365 +                        src += 3;                                       \
  10.366 +                }                                                       \
  10.367 +                x_write = (x_write + 4) & 7;                            \
  10.368 +        } while (0)
  10.369 +
  10.370 +#define DECODE_XRGB8888()                                               \
  10.371 +        do                                                              \
  10.372 +        {                                                               \
  10.373 +                int c;                                                  \
  10.374 +                                                                        \
  10.375 +                for (c = 0; c < 4; c++)                                 \
  10.376 +                {                                                       \
  10.377 +                        r[x_write + c] = src[0];                        \
  10.378 +                        g[x_write + c] = src[1];                        \
  10.379 +                        b[x_write + c] = src[2];                        \
  10.380 +                        src += 4;                                       \
  10.381 +                }                                                       \
  10.382 +                x_write = (x_write + 4) & 7;                            \
  10.383 +        } while (0)
  10.384 +
  10.385 +#define OVERLAY_SAMPLE()                        \
  10.386 +        do                                      \
  10.387 +        {                                       \
  10.388 +                switch (virge->streams.sdif)    \
  10.389 +                {                               \
  10.390 +                        case 1:                 \
  10.391 +                        DECODE_YCbCr();         \
  10.392 +                        break;                  \
  10.393 +                        case 2:                 \
  10.394 +                        DECODE_YUV422();        \
  10.395 +                        break;                  \
  10.396 +                        case 3:                 \
  10.397 +                        DECODE_RGB555();        \
  10.398 +                        break;                  \
  10.399 +                        case 4:                 \
  10.400 +                        DECODE_YUV211();        \
  10.401 +                        break;                  \
  10.402 +                        case 5:                 \
  10.403 +                        DECODE_RGB565();        \
  10.404 +                        break;                  \
  10.405 +                        case 6:                 \
  10.406 +                        DECODE_RGB888();        \
  10.407 +                        break;                  \
  10.408 +                        case 7:                 \
  10.409 +                        default:                \
  10.410 +                        DECODE_XRGB8888();      \
  10.411 +                        break;                  \
  10.412 +                }                               \
  10.413 +        } while (0)
  10.414 +
  10.415 +static void s3_virge_overlay_draw(svga_t *svga, int displine)
  10.416 +{
  10.417 +        virge_t *virge = (virge_t *)svga->p;
  10.418 +        int offset = (virge->streams.sec_x - virge->streams.pri_x) + 1;
  10.419 +        int h_acc = virge->streams.dda_horiz_accumulator;
  10.420 +        int r[8], g[8], b[8];
  10.421 +        int r_samp[8] = {0, 0, 0, 0, 0, 0, 0, 0};
  10.422 +        int g_samp[8] = {0, 0, 0, 0, 0, 0, 0, 0};
  10.423 +        int b_samp[8] = {0, 0, 0, 0, 0, 0, 0, 0};
  10.424 +        int x_size, x_read = 4, x_write = 4;
  10.425 +        int x;
  10.426 +        uint32_t *p;
  10.427 +        uint8_t *src = &svga->vram[svga->overlay_latch.addr];
  10.428 +        
  10.429 +        p = &((uint32_t *)buffer32->line[displine])[offset + 32];
  10.430 +        
  10.431 +        if ((offset + virge->streams.sec_w) > virge->streams.pri_w)
  10.432 +                x_size = (virge->streams.pri_w - virge->streams.sec_x) + 1;
  10.433 +        else
  10.434 +                x_size = virge->streams.sec_w + 1;
  10.435 +
  10.436 +        OVERLAY_SAMPLE();
  10.437 +        
  10.438 +        for (x = 0; x < x_size; x++)
  10.439 +        {
  10.440 +                *p++ = r[x_read] | (g[x_read] << 8) | (b[x_read] << 16);
  10.441 +
  10.442 +                h_acc += virge->streams.k1_horiz_scale;
  10.443 +                if (h_acc >= 0)
  10.444 +                {
  10.445 +                        if ((x_read ^ (x_read + 1)) & ~3)
  10.446 +                                OVERLAY_SAMPLE();
  10.447 +                        x_read = (x_read + 1) & 7;
  10.448 +
  10.449 +                        h_acc += (virge->streams.k2_horiz_scale - virge->streams.k1_horiz_scale);
  10.450 +                }
  10.451 +        }
  10.452 +
  10.453 +        svga->overlay_latch.v_acc += virge->streams.k1_vert_scale;
  10.454 +        if (svga->overlay_latch.v_acc >= 0)
  10.455 +        {
  10.456 +                svga->overlay_latch.v_acc += (virge->streams.k2_vert_scale - virge->streams.k1_vert_scale);
  10.457 +                svga->overlay_latch.addr += virge->streams.sec_stride;
  10.458 +        }
  10.459 +}
  10.460 +
  10.461  static uint8_t s3_virge_pci_read(int func, int addr, void *p)
  10.462  {
  10.463          virge_t *virge = (virge_t *)p;
  10.464 @@ -2658,7 +2978,8 @@
  10.465          svga_init(&virge->svga, virge, 1 << 22, /*4mb*/
  10.466                     s3_virge_recalctimings,
  10.467                     s3_virge_in, s3_virge_out,
  10.468 -                   s3_virge_hwcursor_draw);
  10.469 +                   s3_virge_hwcursor_draw,
  10.470 +                   s3_virge_overlay_draw);
  10.471  
  10.472          rom_init(&virge->bios_rom, "roms/s3virge.bin", 0xc0000, 0x8000, 0x7fff, 0, MEM_MAPPING_EXTERNAL);
  10.473          if (PCI)
  10.474 @@ -2728,7 +3049,8 @@
  10.475          svga_init(&virge->svga, virge, 1 << 22, /*4mb*/
  10.476                     s3_virge_recalctimings,
  10.477                     s3_virge_in, s3_virge_out,
  10.478 -                   s3_virge_hwcursor_draw);
  10.479 +                   s3_virge_hwcursor_draw,
  10.480 +                   s3_virge_overlay_draw);
  10.481  
  10.482          rom_init(&virge->bios_rom, "roms/86c375_1.bin", 0xc0000, 0x8000, 0x7fff, 0, MEM_MAPPING_EXTERNAL);
  10.483          if (PCI)
    11.1 --- a/src/vid_svga.c	Mon Jun 09 18:13:57 2014 +0100
    11.2 +++ b/src/vid_svga.c	Fri Jun 13 18:43:25 2014 +0100
    11.3 @@ -400,7 +400,11 @@
    11.4          {
    11.5  //                if (!(vc & 15)) pclog("VC %i %i\n", vc, GetTickCount());
    11.6                  if (svga->displine == svga->hwcursor_latch.y && svga->hwcursor_latch.ena)
    11.7 -                   svga->hwcursor_on = 64 - svga->hwcursor_latch.yoff;
    11.8 +                        svga->hwcursor_on = 64 - svga->hwcursor_latch.yoff;
    11.9 +
   11.10 +                if (svga->displine == svga->overlay_latch.y && svga->overlay_latch.ena)
   11.11 +                        svga->overlay_on = svga->overlay_latch.ysize - svga->overlay_latch.yoff;
   11.12 +
   11.13                  svga->vidtime += svga->dispofftime;
   11.14  //                if (output) printf("Display off %f\n",vidtime);
   11.15                  svga->cgastat |= 1;
   11.16 @@ -414,11 +418,17 @@
   11.17                          if (svga->firstline == 2000) 
   11.18                                  svga->firstline = svga->displine;
   11.19                          
   11.20 -                        if (svga->hwcursor_on) 
   11.21 +                        if (svga->hwcursor_on || svga->overlay_on)
   11.22                                  svga->changedvram[svga->ma >> 12] = svga->changedvram[(svga->ma >> 12) + 1] = 2;
   11.23                          
   11.24                          svga->render(svga);
   11.25                          
   11.26 +                        if (svga->overlay_on)
   11.27 +                        {
   11.28 +                                svga->overlay_draw(svga, svga->displine);
   11.29 +                                svga->overlay_on--;
   11.30 +                        }
   11.31 +
   11.32                          if (svga->hwcursor_on)
   11.33                          {
   11.34                                  svga->hwcursor_draw(svga, svga->displine);
   11.35 @@ -594,6 +604,9 @@
   11.36                          
   11.37                          svga->hwcursor_on = 0;
   11.38                          svga->hwcursor_latch = svga->hwcursor;
   11.39 +
   11.40 +                        svga->overlay_on = 0;
   11.41 +                        svga->overlay_latch = svga->overlay;
   11.42  //                        pclog("Latch HWcursor addr %08X\n", svga_hwcursor_latch.addr);
   11.43                          
   11.44  //                        pclog("ADDR %08X\n",hwcursor_addr);
   11.45 @@ -609,7 +622,8 @@
   11.46                 void (*recalctimings_ex)(struct svga_t *svga),
   11.47                 uint8_t (*video_in) (uint16_t addr, void *p),
   11.48                 void    (*video_out)(uint16_t addr, uint8_t val, void *p),
   11.49 -               void (*hwcursor_draw)(struct svga_t *svga, int displine))
   11.50 +               void (*hwcursor_draw)(struct svga_t *svga, int displine),
   11.51 +               void (*overlay_draw)(struct svga_t *svga, int displine))
   11.52  {
   11.53          int c, d, e;
   11.54          
   11.55 @@ -639,6 +653,7 @@
   11.56          svga->video_in  = video_in;
   11.57          svga->video_out = video_out;
   11.58          svga->hwcursor_draw = hwcursor_draw;
   11.59 +        svga->overlay_draw = overlay_draw;
   11.60  //        _svga_recalctimings(svga);
   11.61  
   11.62          mem_mapping_add(&svga->mapping, 0xa0000, 0x20000, svga_read, svga_readw, svga_readl, svga_write, svga_writew, svga_writel, NULL, 0, svga);
    12.1 --- a/src/vid_svga.h	Mon Jun 09 18:13:57 2014 +0100
    12.2 +++ b/src/vid_svga.h	Fri Jun 13 18:43:25 2014 +0100
    12.3 @@ -82,10 +82,13 @@
    12.4                  int ena;
    12.5                  int x, y;
    12.6                  int xoff, yoff;
    12.7 +                int ysize;
    12.8                  uint32_t addr;
    12.9 -        } hwcursor, hwcursor_latch;
   12.10 +                int v_acc, h_acc;
   12.11 +        } hwcursor, hwcursor_latch, overlay, overlay_latch;
   12.12          
   12.13          int hwcursor_on;
   12.14 +        int overlay_on;
   12.15          
   12.16          void (*render)(struct svga_t *svga);
   12.17          void (*recalctimings_ex)(struct svga_t *svga);
   12.18 @@ -94,6 +97,8 @@
   12.19          uint8_t (*video_in) (uint16_t addr, void *p);
   12.20  
   12.21          void (*hwcursor_draw)(struct svga_t *svga, int displine);
   12.22 +
   12.23 +        void (*overlay_draw)(struct svga_t *svga, int displine);
   12.24          
   12.25          void *p;
   12.26  } svga_t;
   12.27 @@ -102,13 +107,11 @@
   12.28                 void (*recalctimings_ex)(struct svga_t *svga),
   12.29                 uint8_t (*video_in) (uint16_t addr, void *p),
   12.30                 void    (*video_out)(uint16_t addr, uint8_t val, void *p),
   12.31 -               void (*hwcursor_draw)(struct svga_t *svga, int displine));
   12.32 +               void (*hwcursor_draw)(struct svga_t *svga, int displine),
   12.33 +               void (*overlay_draw)(struct svga_t *svga, int displine));
   12.34  extern void svga_recalctimings(svga_t *svga);
   12.35  
   12.36  
   12.37 -extern int      svga_hwcursor_on;
   12.38 -extern void   (*svga_hwcursor_draw)(int displine);
   12.39 -
   12.40  uint8_t  svga_read(uint32_t addr, void *p);
   12.41  uint16_t svga_readw(uint32_t addr, void *p);
   12.42  uint32_t svga_readl(uint32_t addr, void *p);
    13.1 --- a/src/vid_tgui9440.c	Mon Jun 09 18:13:57 2014 +0100
    13.2 +++ b/src/vid_tgui9440.c	Fri Jun 13 18:43:25 2014 +0100
    13.3 @@ -475,7 +475,8 @@
    13.4          svga_init(&tgui->svga, tgui, 1 << 21, /*2mb*/
    13.5                     tgui_recalctimings,
    13.6                     tgui_in, tgui_out,
    13.7 -                   tgui_hwcursor_draw);
    13.8 +                   tgui_hwcursor_draw,
    13.9 +                   NULL);
   13.10  
   13.11          mem_mapping_add(&tgui->linear_mapping, 0,       0,      svga_read_linear, svga_readw_linear, svga_readl_linear, svga_write_linear, svga_writew_linear, svga_writel_linear, NULL, 0, &tgui->svga);
   13.12          mem_mapping_add(&tgui->accel_mapping,  0xbc000, 0x4000, tgui_accel_read,  tgui_accel_read_w, tgui_accel_read_l, tgui_accel_write,  tgui_accel_write_w, tgui_accel_write_l, NULL, 0,  tgui);
    14.1 --- a/src/vid_tvga.c	Mon Jun 09 18:13:57 2014 +0100
    14.2 +++ b/src/vid_tvga.c	Fri Jun 13 18:43:25 2014 +0100
    14.3 @@ -252,6 +252,7 @@
    14.4          svga_init(&tvga->svga, tvga, 1 << 20, /*1mb - chip supports 2mb, but drivers are buggy*/
    14.5                     tvga_recalctimings,
    14.6                     tvga_in, tvga_out,
    14.7 +                   NULL,
    14.8                     NULL);
    14.9         
   14.10          io_sethandler(0x03c0, 0x0020, tvga_in, NULL, NULL, tvga_out, NULL, NULL, tvga);
    15.1 --- a/src/vid_vga.c	Mon Jun 09 18:13:57 2014 +0100
    15.2 +++ b/src/vid_vga.c	Fri Jun 13 18:43:25 2014 +0100
    15.3 @@ -86,6 +86,7 @@
    15.4          svga_init(&vga->svga, vga, 1 << 18, /*256kb*/
    15.5                     NULL,
    15.6                     vga_in, vga_out,
    15.7 +                   NULL,
    15.8                     NULL);
    15.9  
   15.10          io_sethandler(0x03c0, 0x0020, vga_in, NULL, NULL, vga_out, NULL, NULL, vga);