PCem
view src/vid_s3_virge.c @ 168:2d3db7fa48ab
Fixed frequency control on ViRGE/DX
| author | TomW |
|---|---|
| date | Sat Oct 04 17:51:51 2014 +0100 |
| parents | a0a99c64d528 |
| children |
line source
1 /*S3 ViRGE emulation*/
2 #include <stdlib.h>
3 #include "ibm.h"
4 #include "device.h"
5 #include "io.h"
6 #include "mem.h"
7 #include "pci.h"
8 #include "rom.h"
9 #include "thread.h"
10 #include "video.h"
11 #include "vid_s3_virge.h"
12 #include "vid_svga.h"
13 #include "vid_svga_render.h"
15 static uint64_t virge_time = 0;
16 static uint64_t status_time = 0;
17 static int reg_writes = 0, reg_reads = 0;
19 static int dither[4][4] =
20 {
21 0, 4, 1, 5,
22 6, 2, 7, 3,
23 1, 5, 0, 4,
24 7, 3, 6, 2,
25 };
27 #define RB_SIZE 256
28 #define RB_MASK (RB_SIZE - 1)
30 #define RB_ENTRIES (virge->s3d_write_idx - virge->s3d_read_idx)
31 #define RB_FULL (RB_ENTRIES == RB_SIZE)
32 #define RB_EMPTY (!RB_ENTRIES)
34 typedef struct s3d_t
35 {
36 uint32_t cmd_set;
37 int clip_l, clip_r, clip_t, clip_b;
39 uint32_t dest_base;
40 uint32_t dest_str;
42 uint32_t z_base;
43 uint32_t z_str;
45 uint32_t tex_base;
46 uint32_t tex_bdr_clr;
47 uint32_t tbv, tbu;
48 int32_t TdVdX, TdUdX;
49 int32_t TdVdY, TdUdY;
50 uint32_t tus, tvs;
52 int32_t TdZdX, TdZdY;
53 uint32_t tzs;
55 int32_t TdWdX, TdWdY;
56 uint32_t tws;
58 int32_t TdDdX, TdDdY;
59 uint32_t tds;
61 int16_t TdGdX, TdBdX, TdRdX, TdAdX;
62 int16_t TdGdY, TdBdY, TdRdY, TdAdY;
63 uint32_t tgs, tbs, trs, tas;
65 uint32_t TdXdY12;
66 uint32_t txend12;
67 uint32_t TdXdY01;
68 uint32_t txend01;
69 uint32_t TdXdY02;
70 uint32_t txs;
71 uint32_t tys;
72 int ty01, ty12, tlr;
73 } s3d_t;
75 typedef struct virge_t
76 {
77 mem_mapping_t linear_mapping;
78 mem_mapping_t mmio_mapping;
79 mem_mapping_t new_mmio_mapping;
81 rom_t bios_rom;
83 svga_t svga;
85 uint8_t bank;
86 uint8_t ma_ext;
87 int width;
88 int bpp;
90 uint8_t virge_id, virge_id_high, virge_id_low, virge_rev;
92 uint32_t linear_base, linear_size;
94 uint8_t pci_regs[256];
96 int is_375;
98 int bilinear_enabled;
99 int dithering_enabled;
100 int memory_size;
102 int pixel_count, tri_count;
104 thread_t *render_thread;
105 event_t *wake_render_thread;
106 event_t *wake_main_thread;
107 event_t *not_full_event;
109 uint32_t hwcursor_col[2];
110 int hwcursor_col_pos;
112 struct
113 {
114 uint32_t src_base;
115 uint32_t dest_base;
116 int clip_l, clip_r, clip_t, clip_b;
117 int dest_str, src_str;
118 uint32_t mono_pat_0;
119 uint32_t mono_pat_1;
120 uint32_t pat_bg_clr;
121 uint32_t pat_fg_clr;
122 uint32_t src_bg_clr;
123 uint32_t src_fg_clr;
124 uint32_t cmd_set;
125 int r_width, r_height;
126 int rsrc_x, rsrc_y;
127 int rdest_x, rdest_y;
129 int lxend0, lxend1;
130 int32_t ldx;
131 uint32_t lxstart, lystart;
132 int lycnt;
133 int line_dir;
135 int src_x, src_y;
136 int dest_x, dest_y;
137 int w, h;
138 uint8_t rop;
140 int data_left_count;
141 uint32_t data_left;
143 uint32_t pattern_8[8*8];
144 uint32_t pattern_16[8*8];
145 uint32_t pattern_32[8*8];
146 } s3d;
148 s3d_t s3d_tri;
150 s3d_t s3d_buffer[RB_SIZE];
151 int s3d_read_idx, s3d_write_idx;
152 int s3d_busy;
154 struct
155 {
156 uint32_t pri_ctrl;
157 uint32_t chroma_ctrl;
158 uint32_t sec_ctrl;
159 uint32_t chroma_upper_bound;
160 uint32_t sec_filter;
161 uint32_t blend_ctrl;
162 uint32_t pri_fb0, pri_fb1;
163 uint32_t pri_stride;
164 uint32_t buffer_ctrl;
165 uint32_t sec_fb0, sec_fb1;
166 uint32_t sec_stride;
167 uint32_t overlay_ctrl;
168 int32_t k1_vert_scale;
169 int32_t k2_vert_scale;
170 int32_t dda_vert_accumulator;
171 int32_t k1_horiz_scale;
172 int32_t k2_horiz_scale;
173 int32_t dda_horiz_accumulator;
174 uint32_t fifo_ctrl;
175 uint32_t pri_start;
176 uint32_t pri_size;
177 uint32_t sec_start;
178 uint32_t sec_size;
180 int sdif;
182 int pri_x, pri_y, pri_w, pri_h;
183 int sec_x, sec_y, sec_w, sec_h;
184 } streams;
185 } virge_t;
187 static void queue_triangle(virge_t *virge);
189 static void s3_virge_recalctimings(svga_t *svga);
190 static void s3_virge_updatemapping(virge_t *virge);
192 static void s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat);
194 static uint8_t s3_virge_mmio_read(uint32_t addr, void *p);
195 static uint16_t s3_virge_mmio_read_w(uint32_t addr, void *p);
196 static uint32_t s3_virge_mmio_read_l(uint32_t addr, void *p);
197 static void s3_virge_mmio_write(uint32_t addr, uint8_t val, void *p);
198 static void s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *p);
199 static void s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *p);
201 enum
202 {
203 CMD_SET_AE = 1,
204 CMD_SET_HC = (1 << 1),
206 CMD_SET_FORMAT_MASK = (7 << 2),
207 CMD_SET_FORMAT_8 = (0 << 2),
208 CMD_SET_FORMAT_16 = (1 << 2),
209 CMD_SET_FORMAT_24 = (2 << 2),
211 CMD_SET_MS = (1 << 6),
212 CMD_SET_IDS = (1 << 7),
213 CMD_SET_MP = (1 << 8),
214 CMD_SET_TP = (1 << 9),
216 CMD_SET_ITA_MASK = (3 << 10),
217 CMD_SET_ITA_BYTE = (0 << 10),
218 CMD_SET_ITA_WORD = (1 << 10),
219 CMD_SET_ITA_DWORD = (2 << 10),
221 CMD_SET_ZUP = (1 << 23),
223 CMD_SET_ZB_MODE = (3 << 24),
225 CMD_SET_XP = (1 << 25),
226 CMD_SET_YP = (1 << 26),
228 CMD_SET_COMMAND_MASK = (15 << 27)
229 };
231 #define CMD_SET_ABC_SRC (1 << 18)
232 #define CMD_SET_ABC_ENABLE (1 << 19)
233 #define CMD_SET_TWE (1 << 26)
235 enum
236 {
237 CMD_SET_COMMAND_BITBLT = (0 << 27),
238 CMD_SET_COMMAND_RECTFILL = (2 << 27),
239 CMD_SET_COMMAND_LINE = (3 << 27),
240 CMD_SET_COMMAND_NOP = (15 << 27)
241 };
243 static void s3_virge_out(uint16_t addr, uint8_t val, void *p)
244 {
245 virge_t *virge = (virge_t *)p;
246 svga_t *svga = &virge->svga;
247 uint8_t old;
249 if (((addr & 0xfff0) == 0x3d0 || (addr & 0xfff0) == 0x3b0) && !(svga->miscout & 1))
250 addr ^= 0x60;
252 // pclog("S3 out %04X %02X %04X:%08X %04X %04X %i\n", addr, val, CS, pc, ES, BX, ins);
254 switch (addr)
255 {
256 case 0x3c5:
257 if (svga->seqaddr >= 0x10)
258 {
259 svga->seqregs[svga->seqaddr & 0x1f]=val;
260 s3_virge_recalctimings(svga);
261 return;
262 }
263 if (svga->seqaddr == 4) /*Chain-4 - update banking*/
264 {
265 if (val & 8) svga->write_bank = svga->read_bank = virge->bank << 16;
266 else svga->write_bank = svga->read_bank = virge->bank << 14;
267 }
268 break;
270 //case 0x3C6: case 0x3C7: case 0x3C8: case 0x3C9:
271 // pclog("Write RAMDAC %04X %02X %04X:%04X\n", addr, val, CS, pc);
272 //sdac_ramdac_out(addr,val);
273 //return;
275 case 0x3d4:
276 svga->crtcreg = val;// & 0x7f;
277 return;
278 case 0x3d5:
279 //pclog("Write CRTC R%02X %02X %04x(%08x):%08x\n", svga->crtcreg, val, CS, cs, pc);
280 if (svga->crtcreg <= 7 && svga->crtc[0x11] & 0x80)
281 return;
282 if (svga->crtcreg >= 0x20 && svga->crtcreg != 0x38 && (svga->crtc[0x38] & 0xcc) != 0x48)
283 return;
284 if (svga->crtcreg >= 0x80)
285 return;
286 old = svga->crtc[svga->crtcreg];
287 svga->crtc[svga->crtcreg] = val;
288 switch (svga->crtcreg)
289 {
290 case 0x31:
291 virge->ma_ext = (virge->ma_ext & 0x1c) | ((val & 0x30) >> 4);
292 svga->vrammask = (val & 8) ? 0x3fffff : 0x3ffff;
293 break;
295 case 0x50:
296 switch (svga->crtc[0x50] & 0xc1)
297 {
298 case 0x00: virge->width = (svga->crtc[0x31] & 2) ? 2048 : 1024; break;
299 case 0x01: virge->width = 1152; break;
300 case 0x40: virge->width = 640; break;
301 case 0x80: virge->width = 800; break;
302 case 0x81: virge->width = 1600; break;
303 case 0xc0: virge->width = 1280; break;
304 }
305 virge->bpp = (svga->crtc[0x50] >> 4) & 3;
306 break;
307 case 0x69:
308 virge->ma_ext = val & 0x1f;
309 break;
311 case 0x35:
312 virge->bank = (virge->bank & 0x70) | (val & 0xf);
313 // pclog("CRTC write R35 %02X\n", val);
314 if (svga->chain4) svga->write_bank = svga->read_bank = virge->bank << 16;
315 else svga->write_bank = svga->read_bank = virge->bank << 14;
316 break;
317 case 0x51:
318 virge->bank = (virge->bank & 0x4f) | ((val & 0xc) << 2);
319 if (svga->chain4) svga->write_bank = svga->read_bank = virge->bank << 16;
320 else svga->write_bank = svga->read_bank = virge->bank << 14;
321 virge->ma_ext = (virge->ma_ext & ~0xc) | ((val & 3) << 2);
322 break;
323 case 0x6a:
324 virge->bank = val;
325 // pclog("CRTC write R6a %02X\n", val);
326 if (svga->chain4) svga->write_bank = svga->read_bank = virge->bank << 16;
327 else svga->write_bank = svga->read_bank = virge->bank << 14;
328 break;
330 case 0x3a:
331 if (val & 0x10) svga->gdcreg[5] |= 0x40; /*Horrible cheat*/
332 break;
334 case 0x45:
335 svga->hwcursor.ena = val & 1;
336 break;
337 case 0x46: case 0x47: case 0x48: case 0x49:
338 case 0x4c: case 0x4d: case 0x4e: case 0x4f:
339 svga->hwcursor.x = ((svga->crtc[0x46] << 8) | svga->crtc[0x47]) & 0x7ff;
340 svga->hwcursor.y = ((svga->crtc[0x48] << 8) | svga->crtc[0x49]) & 0x7ff;
341 svga->hwcursor.xoff = svga->crtc[0x4e] & 63;
342 svga->hwcursor.yoff = svga->crtc[0x4f] & 63;
343 svga->hwcursor.addr = ((((svga->crtc[0x4c] << 8) | svga->crtc[0x4d]) & 0xfff) * 1024) + (svga->hwcursor.yoff * 16);
344 break;
346 case 0x4a:
347 virge->hwcursor_col[1] = (virge->hwcursor_col[1] & ~(0xff << (virge->hwcursor_col_pos * 8))) |
348 (val << (virge->hwcursor_col_pos * 8));
349 virge->hwcursor_col_pos++;
350 virge->hwcursor_col_pos &= 3;
351 break;
352 case 0x4b:
353 virge->hwcursor_col[0] = (virge->hwcursor_col[0] & ~(0xff << (virge->hwcursor_col_pos * 8))) |
354 (val << (virge->hwcursor_col_pos * 8));
355 virge->hwcursor_col_pos++;
356 virge->hwcursor_col_pos &= 3;
357 break;
359 case 0x53:
360 case 0x58: case 0x59: case 0x5a:
361 s3_virge_updatemapping(virge);
362 break;
364 case 0x67:
365 switch (val >> 4)
366 {
367 case 3: svga->bpp = 15; break;
368 case 5: svga->bpp = 16; break;
369 case 7: svga->bpp = 24; break;
370 case 13: svga->bpp = 32; break;
371 default: svga->bpp = 8; break;
372 }
373 break;
374 //case 0x55: case 0x43:
375 // pclog("Write CRTC R%02X %02X\n", crtcreg, val);
376 }
377 if (old != val)
378 {
379 if (svga->crtcreg < 0xe || svga->crtcreg > 0x10)
380 {
381 svga->fullchange = changeframecount;
382 svga_recalctimings(svga);
383 }
384 }
385 break;
386 }
387 svga_out(addr, val, svga);
388 }
390 static uint8_t s3_virge_in(uint16_t addr, void *p)
391 {
392 virge_t *virge = (virge_t *)p;
393 svga_t *svga = &virge->svga;
394 uint8_t ret;
396 if (((addr & 0xfff0) == 0x3d0 || (addr & 0xfff0) == 0x3b0) && !(svga->miscout & 1))
397 addr ^= 0x60;
399 // if (addr != 0x3da) pclog("S3 in %04X %04X:%08X ", addr, CS, pc);
400 switch (addr)
401 {
402 case 0x3c1:
403 if (svga->attraddr > 0x14)
404 ret = 0xff;
405 else
406 ret = svga_in(addr, svga);
407 break;
408 //case 0x3C6: case 0x3C7: case 0x3C8: case 0x3C9:
409 // pclog("Read RAMDAC %04X %04X:%04X\n", addr, CS, pc);
410 //return sdac_ramdac_in(addr);
412 case 0x3c5:
413 if (svga->seqaddr >= 8)
414 ret = svga->seqregs[svga->seqaddr & 0x1f];
415 else if (svga->seqaddr <= 4)
416 ret = svga_in(addr, svga);
417 else
418 ret = 0xff;
419 break;
421 case 0x3D4:
422 ret = svga->crtcreg;
423 break;
424 case 0x3D5:
425 //pclog("Read CRTC R%02X %04X:%04X (%02x)\n", svga->crtcreg, CS, pc, svga->crtc[svga->crtcreg]);
426 switch (svga->crtcreg)
427 {
428 case 0x2d: ret = virge->virge_id_high; break; /*Extended chip ID*/
429 case 0x2e: ret = virge->virge_id_low; break; /*New chip ID*/
430 case 0x2f: ret = virge->virge_rev; break;
431 case 0x30: ret = virge->virge_id; break; /*Chip ID*/
432 case 0x31: ret = (svga->crtc[0x31] & 0xcf) | ((virge->ma_ext & 3) << 4); break;
433 case 0x35: ret = (svga->crtc[0x35] & 0xf0) | (virge->bank & 0xf); break;
434 case 0x36: ret = (svga->crtc[0x36] & 0xfc) | 2; break; /*PCI bus*/
435 case 0x45: virge->hwcursor_col_pos = 0; ret = svga->crtc[0x45]; break;
436 case 0x51: ret = (svga->crtc[0x51] & 0xf0) | ((virge->bank >> 2) & 0xc) | ((virge->ma_ext >> 2) & 3); break;
437 case 0x69: ret = virge->ma_ext; break;
438 case 0x6a: ret = virge->bank; break;
439 default: ret = svga->crtc[svga->crtcreg]; break;
440 }
441 break;
443 default:
444 ret = svga_in(addr, svga);
445 break;
446 }
447 // if (addr != 0x3da) pclog("%02X\n", ret);
448 return ret;
449 }
451 static void s3_virge_recalctimings(svga_t *svga)
452 {
453 virge_t *virge = (virge_t *)svga->p;
455 if (svga->crtc[0x5d] & 0x01) svga->htotal += 0x100;
456 if (svga->crtc[0x5d] & 0x02) svga->hdisp += 0x100;
457 if (svga->crtc[0x5e] & 0x01) svga->vtotal += 0x400;
458 if (svga->crtc[0x5e] & 0x02) svga->dispend += 0x400;
459 if (svga->crtc[0x5e] & 0x04) svga->vblankstart += 0x400;
460 if (svga->crtc[0x5e] & 0x10) svga->vsyncstart += 0x400;
461 if (svga->crtc[0x5e] & 0x40) svga->split += 0x400;
462 svga->interlace = svga->crtc[0x42] & 0x20;
464 if ((svga->crtc[0x67] & 0xc) != 0xc) /*VGA mode*/
465 {
466 svga->ma_latch |= (virge->ma_ext << 16);
467 //pclog("VGA mode\n");
468 if (svga->crtc[0x51] & 0x30) svga->rowoffset += (svga->crtc[0x51] & 0x30) << 4;
469 else if (svga->crtc[0x43] & 0x04) svga->rowoffset += 0x100;
470 if (!svga->rowoffset) svga->rowoffset = 256;
472 if ((svga->gdcreg[5] & 0x40) && (svga->crtc[0x3a] & 0x10))
473 {
474 switch (svga->bpp)
475 {
476 case 8:
477 svga->render = svga_render_8bpp_highres;
478 break;
479 case 15:
480 svga->render = svga_render_15bpp_highres;
481 break;
482 case 16:
483 svga->render = svga_render_16bpp_highres;
484 break;
485 case 24:
486 svga->render = svga_render_24bpp_highres;
487 break;
488 case 32:
489 svga->render = svga_render_32bpp_highres;
490 break;
491 }
492 }
494 // pclog("svga->rowoffset = %i bpp=%i\n", svga->rowoffset, svga->bpp);
495 if (svga->bpp == 15 || svga->bpp == 16)
496 {
497 svga->htotal >>= 1;
498 svga->hdisp >>= 1;
499 }
500 if (svga->bpp == 24)
501 {
502 svga->rowoffset = (svga->rowoffset * 3) / 4; /*Hack*/
503 }
504 //pclog("VGA mode x_disp=%i dispend=%i vtotal=%i\n", svga->hdisp, svga->dispend, svga->vtotal);
505 }
506 else /*Streams mode*/
507 {
508 if (virge->streams.buffer_ctrl & 1)
509 svga->ma_latch = virge->streams.pri_fb1 >> 2;
510 else
511 svga->ma_latch = virge->streams.pri_fb0 >> 2;
513 svga->hdisp = virge->streams.pri_w + 1;
514 svga->dispend = virge->streams.pri_h;
516 svga->overlay.x = virge->streams.sec_x - virge->streams.pri_x;
517 svga->overlay.y = virge->streams.sec_y - virge->streams.pri_y;
518 svga->overlay.ysize = virge->streams.sec_h;
520 if (virge->streams.buffer_ctrl & 2)
521 svga->overlay.addr = virge->streams.sec_fb1;
522 else
523 svga->overlay.addr = virge->streams.sec_fb0;
525 svga->overlay.ena = (svga->overlay.x >= 0);
526 svga->overlay.v_acc = virge->streams.dda_vert_accumulator;
527 //pclog("Streams mode x_disp=%i dispend=%i vtotal=%i x=%i y=%i ysize=%i\n", svga->hdisp, svga->dispend, svga->vtotal, svga->overlay.x, svga->overlay.y, svga->overlay.ysize);
528 svga->rowoffset = virge->streams.pri_stride >> 3;
530 switch ((virge->streams.pri_ctrl >> 24) & 0x7)
531 {
532 case 0: /*RGB-8 (CLUT)*/
533 svga->render = svga_render_8bpp_highres;
534 break;
535 case 3: /*KRGB-16 (1.5.5.5)*/
536 svga->htotal >>= 1;
537 svga->render = svga_render_15bpp_highres;
538 break;
539 case 5: /*RGB-16 (5.6.5)*/
540 svga->htotal >>= 1;
541 svga->render = svga_render_16bpp_highres;
542 break;
543 case 6: /*RGB-24 (8.8.8)*/
544 svga->render = svga_render_24bpp_highres;
545 break;
546 case 7: /*XRGB-32 (X.8.8.8)*/
547 svga->render = svga_render_32bpp_highres;
548 break;
549 }
550 }
552 if (((svga->miscout >> 2) & 3) == 3)
553 {
554 int n = svga->seqregs[0x12] & 0x1f;
555 int r = (svga->seqregs[0x12] >> 5) & (virge->is_375 ? 7 : 3);
556 int m = svga->seqregs[0x13] & 0x7f;
557 double freq = (((double)m + 2) / (((double)n + 2) * (double)(1 << r))) * 14318184.0;
559 svga->clock = cpuclock / freq;
560 }
561 }
563 static void s3_virge_updatemapping(virge_t *virge)
564 {
565 svga_t *svga = &virge->svga;
567 if (!(virge->pci_regs[PCI_REG_COMMAND] & PCI_COMMAND_MEM))
568 {
569 // pclog("Update mapping - PCI disabled\n");
570 mem_mapping_disable(&svga->mapping);
571 mem_mapping_disable(&virge->linear_mapping);
572 mem_mapping_disable(&virge->mmio_mapping);
573 mem_mapping_disable(&virge->new_mmio_mapping);
574 return;
575 }
577 pclog("Update mapping - bank %02X ", svga->gdcreg[6] & 0xc);
578 switch (svga->gdcreg[6] & 0xc) /*Banked framebuffer*/
579 {
580 case 0x0: /*128k at A0000*/
581 mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x20000);
582 svga->banked_mask = 0xffff;
583 break;
584 case 0x4: /*64k at A0000*/
585 mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x10000);
586 svga->banked_mask = 0xffff;
587 break;
588 case 0x8: /*32k at B0000*/
589 mem_mapping_set_addr(&svga->mapping, 0xb0000, 0x08000);
590 svga->banked_mask = 0x7fff;
591 break;
592 case 0xC: /*32k at B8000*/
593 mem_mapping_set_addr(&svga->mapping, 0xb8000, 0x08000);
594 svga->banked_mask = 0x7fff;
595 break;
596 }
598 virge->linear_base = (svga->crtc[0x5a] << 16) | (svga->crtc[0x59] << 24);
600 pclog("Linear framebuffer %02X ", svga->crtc[0x58] & 0x10);
601 if (svga->crtc[0x58] & 0x10) /*Linear framebuffer*/
602 {
603 switch (svga->crtc[0x58] & 3)
604 {
605 case 0: /*64k*/
606 virge->linear_size = 0x10000;
607 break;
608 case 1: /*1mb*/
609 virge->linear_size = 0x100000;
610 break;
611 case 2: /*2mb*/
612 virge->linear_size = 0x200000;
613 break;
614 case 3: /*8mb*/
615 virge->linear_size = 0x400000;
616 break;
617 }
618 virge->linear_base &= ~(virge->linear_size - 1);
619 // pclog("%08X %08X %02X %02X %02X\n", linear_base, linear_size, crtc[0x58], crtc[0x59], crtc[0x5a]);
620 pclog("Linear framebuffer at %08X size %08X\n", virge->linear_base, virge->linear_size);
621 if (virge->linear_base == 0xa0000)
622 {
623 mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x10000);
624 mem_mapping_disable(&virge->linear_mapping);
625 }
626 else
627 mem_mapping_set_addr(&virge->linear_mapping, virge->linear_base, virge->linear_size);
628 }
629 else
630 mem_mapping_disable(&virge->linear_mapping);
632 pclog("Memory mapped IO %02X\n", svga->crtc[0x53] & 0x18);
633 if (svga->crtc[0x53] & 0x10) /*Old MMIO*/
634 {
635 if (svga->crtc[0x53] & 0x20)
636 mem_mapping_set_addr(&virge->mmio_mapping, 0xb8000, 0x8000);
637 else
638 mem_mapping_set_addr(&virge->mmio_mapping, 0xa0000, 0x10000);
639 }
640 else
641 mem_mapping_disable(&virge->mmio_mapping);
643 if (svga->crtc[0x53] & 0x08) /*New MMIO*/
644 mem_mapping_set_addr(&virge->new_mmio_mapping, virge->linear_base + 0x1000000, 0x10000);
645 else
646 mem_mapping_disable(&virge->new_mmio_mapping);
648 }
651 static uint8_t s3_virge_mmio_read(uint32_t addr, void *p)
652 {
653 reg_reads++;
654 // pclog("New MMIO readb %08X\n", addr);
655 switch (addr & 0xffff)
656 {
657 case 0x83b0: case 0x83b1: case 0x83b2: case 0x83b3:
658 case 0x83b4: case 0x83b5: case 0x83b6: case 0x83b7:
659 case 0x83b8: case 0x83b9: case 0x83ba: case 0x83bb:
660 case 0x83bc: case 0x83bd: case 0x83be: case 0x83bf:
661 case 0x83c0: case 0x83c1: case 0x83c2: case 0x83c3:
662 case 0x83c4: case 0x83c5: case 0x83c6: case 0x83c7:
663 case 0x83c8: case 0x83c9: case 0x83ca: case 0x83cb:
664 case 0x83cc: case 0x83cd: case 0x83ce: case 0x83cf:
665 case 0x83d0: case 0x83d1: case 0x83d2: case 0x83d3:
666 case 0x83d4: case 0x83d5: case 0x83d6: case 0x83d7:
667 case 0x83d8: case 0x83d9: case 0x83da: case 0x83db:
668 case 0x83dc: case 0x83dd: case 0x83de: case 0x83df:
669 return s3_virge_in(addr & 0x3ff, p);
670 }
671 return 0xff;
672 }
673 static uint16_t s3_virge_mmio_read_w(uint32_t addr, void *p)
674 {
675 reg_reads++;
676 // pclog("New MMIO readw %08X\n", addr);
677 switch (addr & 0xfffe)
678 {
679 default:
680 return s3_virge_mmio_read(addr, p) | (s3_virge_mmio_read(addr + 1, p) << 8);
681 }
682 return 0xffff;
683 }
684 static uint32_t s3_virge_mmio_read_l(uint32_t addr, void *p)
685 {
686 virge_t *virge = (virge_t *)p;
687 uint32_t ret = 0xffffffff;
688 reg_reads++;
689 // pclog("New MMIO readl %08X %04X(%08X):%08X ", addr, CS, cs, pc);
690 switch (addr & 0xfffc)
691 {
692 case 0x8180:
693 ret = virge->streams.pri_ctrl;
694 break;
695 case 0x8184:
696 ret = virge->streams.chroma_ctrl;
697 break;
698 case 0x8190:
699 ret = virge->streams.sec_ctrl;
700 break;
701 case 0x8194:
702 ret = virge->streams.chroma_upper_bound;
703 break;
704 case 0x8198:
705 ret = virge->streams.sec_filter;
706 break;
707 case 0x81a0:
708 ret = virge->streams.blend_ctrl;
709 break;
710 case 0x81c0:
711 ret = virge->streams.pri_fb0;
712 break;
713 case 0x81c4:
714 ret = virge->streams.pri_fb1;
715 break;
716 case 0x81c8:
717 ret = virge->streams.pri_stride;
718 break;
719 case 0x81cc:
720 ret = virge->streams.buffer_ctrl;
721 break;
722 case 0x81d0:
723 ret = virge->streams.sec_fb0;
724 break;
725 case 0x81d4:
726 ret = virge->streams.sec_fb1;
727 break;
728 case 0x81d8:
729 ret = virge->streams.sec_stride;
730 break;
731 case 0x81dc:
732 ret = virge->streams.overlay_ctrl;
733 break;
734 case 0x81e0:
735 ret = virge->streams.k1_vert_scale;
736 break;
737 case 0x81e4:
738 ret = virge->streams.k2_vert_scale;
739 break;
740 case 0x81e8:
741 ret = virge->streams.dda_vert_accumulator;
742 break;
743 case 0x81ec:
744 ret = virge->streams.fifo_ctrl;
745 break;
746 case 0x81f0:
747 ret = virge->streams.pri_start;
748 break;
749 case 0x81f4:
750 ret = virge->streams.pri_size;
751 break;
752 case 0x81f8:
753 ret = virge->streams.sec_start;
754 break;
755 case 0x81fc:
756 ret = virge->streams.sec_size;
757 break;
759 case 0x8504:
760 if (virge->s3d_busy)
761 ret = (0x10 << 8);
762 else
763 ret = (0x10 << 8) | (1 << 13);
764 // pclog("Read status %04x %i\n", ret, virge->s3d_busy);
765 break;
766 case 0xa4d4:
767 ret = virge->s3d.src_base;
768 break;
769 case 0xa4d8:
770 ret = virge->s3d.dest_base;
771 break;
772 case 0xa4dc:
773 ret = (virge->s3d.clip_l << 16) | virge->s3d.clip_r;
774 break;
775 case 0xa4e0:
776 ret = (virge->s3d.clip_t << 16) | virge->s3d.clip_b;
777 break;
778 case 0xa4e4:
779 ret = (virge->s3d.dest_str << 16) | virge->s3d.src_str;
780 break;
781 case 0xa4e8:
782 ret = virge->s3d.mono_pat_0;
783 break;
784 case 0xa4ec:
785 ret = virge->s3d.mono_pat_1;
786 break;
787 case 0xa4f0:
788 ret = virge->s3d.pat_bg_clr;
789 break;
790 case 0xa4f4:
791 ret = virge->s3d.pat_fg_clr;
792 break;
793 case 0xa4f8:
794 ret = virge->s3d.src_bg_clr;
795 break;
796 case 0xa4fc:
797 ret = virge->s3d.src_fg_clr;
798 break;
799 case 0xa500:
800 ret = virge->s3d.cmd_set;
801 break;
802 case 0xa504:
803 ret = (virge->s3d.r_width << 16) | virge->s3d.r_height;
804 break;
805 case 0xa508:
806 ret = (virge->s3d.rsrc_x << 16) | virge->s3d.rsrc_y;
807 break;
808 case 0xa50c:
809 ret = (virge->s3d.rdest_x << 16) | virge->s3d.rdest_y;
810 break;
812 default:
813 ret = s3_virge_mmio_read_w(addr, p) | (s3_virge_mmio_read_w(addr + 2, p) << 16);
814 }
815 // /*if ((addr & 0xfffc) != 0x8504) */pclog("%02x\n", ret);
816 return ret;
817 }
818 static void s3_virge_mmio_write(uint32_t addr, uint8_t val, void *p)
819 {
820 virge_t *virge = (virge_t *)p;
821 svga_t *svga = &virge->svga;
823 // pclog("New MMIO writeb %08X %02X %04x(%08x):%08x\n", addr, val, CS, cs, pc);
824 reg_writes++;
825 if ((addr & 0xfffc) < 0x8000)
826 s3_virge_bitblt(virge, 8, val);
827 else switch (addr & 0xffff)
828 {
829 case 0x83b0: case 0x83b1: case 0x83b2: case 0x83b3:
830 case 0x83b4: case 0x83b5: case 0x83b6: case 0x83b7:
831 case 0x83b8: case 0x83b9: case 0x83ba: case 0x83bb:
832 case 0x83bc: case 0x83bd: case 0x83be: case 0x83bf:
833 case 0x83c0: case 0x83c1: case 0x83c2: case 0x83c3:
834 case 0x83c4: case 0x83c5: case 0x83c6: case 0x83c7:
835 case 0x83c8: case 0x83c9: case 0x83ca: case 0x83cb:
836 case 0x83cc: case 0x83cd: case 0x83ce: case 0x83cf:
837 case 0x83d0: case 0x83d1: case 0x83d2: case 0x83d3:
838 case 0x83d4: case 0x83d5: case 0x83d6: case 0x83d7:
839 case 0x83d8: case 0x83d9: case 0x83da: case 0x83db:
840 case 0x83dc: case 0x83dd: case 0x83de: case 0x83df:
841 s3_virge_out(addr & 0x3ff, val, p);
842 break;
843 }
846 }
847 static void s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *p)
848 {
849 virge_t *virge = (virge_t *)p;
850 reg_writes++;
851 // pclog("New MMIO writew %08X %04X %04x(%08x):%08x\n", addr, val, CS, cs, pc);
852 if ((addr & 0xfffc) < 0x8000)
853 {
854 if (virge->s3d.cmd_set & CMD_SET_MS)
855 s3_virge_bitblt(virge, 16, ((val >> 8) | (val << 8)) << 16);
856 else
857 s3_virge_bitblt(virge, 16, val);
858 }
859 else switch (addr & 0xfffe)
860 {
861 case 0x83d4:
862 s3_virge_mmio_write(addr, val, p);
863 s3_virge_mmio_write(addr + 1, val >> 8, p);
864 break;
865 }
866 }
867 static void s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *p)
868 {
869 virge_t *virge = (virge_t *)p;
870 svga_t *svga = &virge->svga;
871 reg_writes++;
872 // if ((addr & 0xfffc) >= 0xb400 && (addr & 0xfffc) < 0xb800)
873 // pclog("New MMIO writel %08X %08X %04x(%08x):%08x\n", addr, val, CS, cs, pc);
875 if ((addr & 0xfffc) < 0x8000)
876 {
877 if (virge->s3d.cmd_set & CMD_SET_MS)
878 s3_virge_bitblt(virge, 32, ((val & 0xff000000) >> 24) | ((val & 0x00ff0000) >> 8) | ((val & 0x0000ff00) << 8) | ((val & 0x000000ff) << 24));
879 else
880 s3_virge_bitblt(virge, 32, val);
881 }
882 else switch (addr & 0xfffc)
883 {
884 case 0x8180:
885 virge->streams.pri_ctrl = val;
886 s3_virge_recalctimings(svga);
887 svga->fullchange = changeframecount;
888 break;
889 case 0x8184:
890 virge->streams.chroma_ctrl = val;
891 break;
892 case 0x8190:
893 virge->streams.sec_ctrl = val;
894 virge->streams.dda_horiz_accumulator = val & 0xfff;
895 if (val & (1 << 11))
896 virge->streams.dda_horiz_accumulator |= 0xfffff800;
897 virge->streams.sdif = (val >> 24) & 7;
898 break;
899 case 0x8194:
900 virge->streams.chroma_upper_bound = val;
901 break;
902 case 0x8198:
903 virge->streams.sec_filter = val;
904 virge->streams.k1_horiz_scale = val & 0x7ff;
905 if (val & (1 << 10))
906 virge->streams.k1_horiz_scale |= 0xfffff800;
907 virge->streams.k2_horiz_scale = (val >> 16) & 0x7ff;
908 if ((val >> 16) & (1 << 10))
909 virge->streams.k2_horiz_scale |= 0xfffff800;
910 break;
911 case 0x81a0:
912 virge->streams.blend_ctrl = val;
913 break;
914 case 0x81c0:
915 // pclog("Write pri_fb0 %08x\n", val);
916 virge->streams.pri_fb0 = val & 0x3fffff;
917 s3_virge_recalctimings(svga);
918 svga->fullchange = changeframecount;
919 break;
920 case 0x81c4:
921 // pclog("Write pri_fb1 %08x\n", val);
922 virge->streams.pri_fb1 = val & 0x3fffff;
923 s3_virge_recalctimings(svga);
924 svga->fullchange = changeframecount;
925 break;
926 case 0x81c8:
927 virge->streams.pri_stride = val & 0xfff;
928 s3_virge_recalctimings(svga);
929 svga->fullchange = changeframecount;
930 break;
931 case 0x81cc:
932 // pclog("Write buffer_ctrl %08x\n", val);
933 virge->streams.buffer_ctrl = val;
934 s3_virge_recalctimings(svga);
935 svga->fullchange = changeframecount;
936 break;
937 case 0x81d0:
938 virge->streams.sec_fb0 = val;
939 s3_virge_recalctimings(svga);
940 svga->fullchange = changeframecount;
941 break;
942 case 0x81d4:
943 virge->streams.sec_fb1 = val;
944 s3_virge_recalctimings(svga);
945 svga->fullchange = changeframecount;
946 break;
947 case 0x81d8:
948 virge->streams.sec_stride = val;
949 s3_virge_recalctimings(svga);
950 svga->fullchange = changeframecount;
951 break;
952 case 0x81dc:
953 virge->streams.overlay_ctrl = val;
954 break;
955 case 0x81e0:
956 virge->streams.k1_vert_scale = val & 0x7ff;
957 if (val & (1 << 10))
958 virge->streams.k1_vert_scale |= 0xfffff800;
959 break;
960 case 0x81e4:
961 virge->streams.k2_vert_scale = val & 0x7ff;
962 if (val & (1 << 10))
963 virge->streams.k2_vert_scale |= 0xfffff800;
964 break;
965 case 0x81e8:
966 virge->streams.dda_vert_accumulator = val & 0xfff;
967 if (val & (1 << 11))
968 virge->streams.dda_vert_accumulator |= 0xfffff800;
969 break;
970 case 0x81ec:
971 virge->streams.fifo_ctrl = val;
972 break;
973 case 0x81f0:
974 virge->streams.pri_start = val;
975 virge->streams.pri_x = (val >> 16) & 0x7ff;
976 virge->streams.pri_y = val & 0x7ff;
977 s3_virge_recalctimings(svga);
978 svga->fullchange = changeframecount;
979 break;
980 case 0x81f4:
981 virge->streams.pri_size = val;
982 virge->streams.pri_w = (val >> 16) & 0x7ff;
983 virge->streams.pri_h = val & 0x7ff;
984 s3_virge_recalctimings(svga);
985 svga->fullchange = changeframecount;
986 break;
987 case 0x81f8:
988 virge->streams.sec_start = val;
989 virge->streams.sec_x = (val >> 16) & 0x7ff;
990 virge->streams.sec_y = val & 0x7ff;
991 s3_virge_recalctimings(svga);
992 svga->fullchange = changeframecount;
993 break;
994 case 0x81fc:
995 virge->streams.sec_size = val;
996 virge->streams.sec_w = (val >> 16) & 0x7ff;
997 virge->streams.sec_h = val & 0x7ff;
998 s3_virge_recalctimings(svga);
999 svga->fullchange = changeframecount;
1000 break;
1002 case 0xa000: case 0xa004: case 0xa008: case 0xa00c:
1003 case 0xa010: case 0xa014: case 0xa018: case 0xa01c:
1004 case 0xa020: case 0xa024: case 0xa028: case 0xa02c:
1005 case 0xa030: case 0xa034: case 0xa038: case 0xa03c:
1006 case 0xa040: case 0xa044: case 0xa048: case 0xa04c:
1007 case 0xa050: case 0xa054: case 0xa058: case 0xa05c:
1008 case 0xa060: case 0xa064: case 0xa068: case 0xa06c:
1009 case 0xa070: case 0xa074: case 0xa078: case 0xa07c:
1010 case 0xa080: case 0xa084: case 0xa088: case 0xa08c:
1011 case 0xa090: case 0xa094: case 0xa098: case 0xa09c:
1012 case 0xa0a0: case 0xa0a4: case 0xa0a8: case 0xa0ac:
1013 case 0xa0b0: case 0xa0b4: case 0xa0b8: case 0xa0bc:
1014 case 0xa0c0: case 0xa0c4: case 0xa0c8: case 0xa0cc:
1015 case 0xa0d0: case 0xa0d4: case 0xa0d8: case 0xa0dc:
1016 case 0xa0e0: case 0xa0e4: case 0xa0e8: case 0xa0ec:
1017 case 0xa0f0: case 0xa0f4: case 0xa0f8: case 0xa0fc:
1018 case 0xa100: case 0xa104: case 0xa108: case 0xa10c:
1019 case 0xa110: case 0xa114: case 0xa118: case 0xa11c:
1020 case 0xa120: case 0xa124: case 0xa128: case 0xa12c:
1021 case 0xa130: case 0xa134: case 0xa138: case 0xa13c:
1022 case 0xa140: case 0xa144: case 0xa148: case 0xa14c:
1023 case 0xa150: case 0xa154: case 0xa158: case 0xa15c:
1024 case 0xa160: case 0xa164: case 0xa168: case 0xa16c:
1025 case 0xa170: case 0xa174: case 0xa178: case 0xa17c:
1026 case 0xa180: case 0xa184: case 0xa188: case 0xa18c:
1027 case 0xa190: case 0xa194: case 0xa198: case 0xa19c:
1028 case 0xa1a0: case 0xa1a4: case 0xa1a8: case 0xa1ac:
1029 case 0xa1b0: case 0xa1b4: case 0xa1b8: case 0xa1bc:
1030 case 0xa1c0: case 0xa1c4: case 0xa1c8: case 0xa1cc:
1031 case 0xa1d0: case 0xa1d4: case 0xa1d8: case 0xa1dc:
1032 case 0xa1e0: case 0xa1e4: case 0xa1e8: case 0xa1ec:
1033 case 0xa1f0: case 0xa1f4: case 0xa1f8: case 0xa1fc:
1034 {
1035 int x = addr & 4;
1036 int y = (addr >> 3) & 7;
1037 virge->s3d.pattern_8[y*8 + x] = val & 0xff;
1038 virge->s3d.pattern_8[y*8 + x + 1] = val >> 8;
1039 virge->s3d.pattern_8[y*8 + x + 2] = val >> 16;
1040 virge->s3d.pattern_8[y*8 + x + 3] = val >> 24;
1042 x = (addr >> 1) & 6;
1043 y = (addr >> 4) & 7;
1044 virge->s3d.pattern_16[y*8 + x] = val & 0xffff;
1045 virge->s3d.pattern_16[y*8 + x + 1] = val >> 16;
1047 x = (addr >> 2) & 7;
1048 y = (addr >> 5) & 7;
1049 virge->s3d.pattern_32[y*8 + x] = val & 0xffffff;
1050 }
1051 break;
1053 case 0xa4d4: case 0xa8d4:
1054 virge->s3d.src_base = val & 0x3ffff8;
1055 break;
1056 case 0xa4d8: case 0xa8d8:
1057 virge->s3d.dest_base = val & 0x3ffff8;
1058 break;
1059 case 0xa4dc: case 0xa8dc:
1060 virge->s3d.clip_l = (val >> 16) & 0x7ff;
1061 virge->s3d.clip_r = val & 0x7ff;
1062 break;
1063 case 0xa4e0: case 0xa8e0:
1064 virge->s3d.clip_t = (val >> 16) & 0x7ff;
1065 virge->s3d.clip_b = val & 0x7ff;
1066 break;
1067 case 0xa4e4: case 0xa8e4:
1068 virge->s3d.dest_str = (val >> 16) & 0xff8;
1069 virge->s3d.src_str = val & 0xff8;
1070 break;
1071 case 0xa4e8:
1072 virge->s3d.mono_pat_0 = val;
1073 break;
1074 case 0xa4ec:
1075 virge->s3d.mono_pat_1 = val;
1076 break;
1077 case 0xa4f0:
1078 virge->s3d.pat_bg_clr = val;
1079 break;
1080 case 0xa4f4: case 0xa8f4:
1081 virge->s3d.pat_fg_clr = val;
1082 break;
1083 case 0xa4f8:
1084 virge->s3d.src_bg_clr = val;
1085 break;
1086 case 0xa4fc:
1087 virge->s3d.src_fg_clr = val;
1088 break;
1089 case 0xa500: case 0xa900:
1090 virge->s3d.cmd_set = val;
1091 if (!(val & CMD_SET_AE))
1092 s3_virge_bitblt(virge, -1, 0);
1093 break;
1094 case 0xa504:
1095 virge->s3d.r_width = (val >> 16) & 0x7ff;
1096 virge->s3d.r_height = val & 0x7ff;
1097 break;
1098 case 0xa508:
1099 virge->s3d.rsrc_x = (val >> 16) & 0x7ff;
1100 virge->s3d.rsrc_y = val & 0x7ff;
1101 break;
1102 case 0xa50c:
1103 virge->s3d.rdest_x = (val >> 16) & 0x7ff;
1104 virge->s3d.rdest_y = val & 0x7ff;
1105 if (virge->s3d.cmd_set & CMD_SET_AE)
1106 s3_virge_bitblt(virge, -1, 0);
1107 break;
1108 case 0xa96c:
1109 virge->s3d.lxend0 = (val >> 16) & 0x7ff;
1110 virge->s3d.lxend1 = val & 0x7ff;
1111 break;
1112 case 0xa970:
1113 virge->s3d.ldx = (int32_t)val;
1114 break;
1115 case 0xa974:
1116 virge->s3d.lxstart = val;
1117 break;
1118 case 0xa978:
1119 virge->s3d.lystart = val & 0x7ff;
1120 break;
1121 case 0xa97c:
1122 virge->s3d.lycnt = val & 0x7ff;
1123 virge->s3d.line_dir = val >> 31;
1124 if (virge->s3d.cmd_set & CMD_SET_AE)
1125 s3_virge_bitblt(virge, -1, 0);
1126 break;
1128 case 0xb4d4:
1129 virge->s3d_tri.z_base = val & 0x3ffff8;
1130 break;
1131 case 0xb4d8:
1132 virge->s3d_tri.dest_base = val & 0x3ffff8;
1133 break;
1134 case 0xb4dc:
1135 virge->s3d_tri.clip_l = (val >> 16) & 0x7ff;
1136 virge->s3d_tri.clip_r = val & 0x7ff;
1137 break;
1138 case 0xb4e0:
1139 virge->s3d_tri.clip_t = (val >> 16) & 0x7ff;
1140 virge->s3d_tri.clip_b = val & 0x7ff;
1141 break;
1142 case 0xb4e4:
1143 virge->s3d_tri.dest_str = (val >> 16) & 0xff8;
1144 virge->s3d.src_str = val & 0xff8;
1145 break;
1146 case 0xb4e8:
1147 virge->s3d_tri.z_str = val & 0xff8;
1148 break;
1149 case 0xb4ec:
1150 virge->s3d_tri.tex_base = val & 0x3ffff8;
1151 break;
1152 case 0xb4f0:
1153 virge->s3d_tri.tex_bdr_clr = val & 0xffffff;
1154 break;
1155 case 0xb500:
1156 virge->s3d_tri.cmd_set = val;
1157 if (!(val & CMD_SET_AE))
1158 queue_triangle(virge);
1159 /* {
1160 thread_set_event(virge->wake_render_thread);
1161 thread_wait_event(virge->wake_main_thread, -1);
1162 } */
1163 // s3_virge_triangle(virge);
1164 break;
1165 case 0xb504:
1166 virge->s3d_tri.tbv = val & 0xfffff;
1167 break;
1168 case 0xb508:
1169 virge->s3d_tri.tbu = val & 0xfffff;
1170 break;
1171 case 0xb50c:
1172 virge->s3d_tri.TdWdX = val;
1173 break;
1174 case 0xb510:
1175 virge->s3d_tri.TdWdY = val;
1176 break;
1177 case 0xb514:
1178 virge->s3d_tri.tws = val;
1179 break;
1180 case 0xb518:
1181 virge->s3d_tri.TdDdX = val;
1182 break;
1183 case 0xb51c:
1184 virge->s3d_tri.TdVdX = val;
1185 break;
1186 case 0xb520:
1187 virge->s3d_tri.TdUdX = val;
1188 break;
1189 case 0xb524:
1190 virge->s3d_tri.TdDdY = val;
1191 break;
1192 case 0xb528:
1193 virge->s3d_tri.TdVdY = val;
1194 break;
1195 case 0xb52c:
1196 virge->s3d_tri.TdUdY = val;
1197 break;
1198 case 0xb530:
1199 virge->s3d_tri.tds = val;
1200 break;
1201 case 0xb534:
1202 virge->s3d_tri.tvs = val;
1203 break;
1204 case 0xb538:
1205 virge->s3d_tri.tus = val;
1206 break;
1207 case 0xb53c:
1208 virge->s3d_tri.TdGdX = val >> 16;
1209 virge->s3d_tri.TdBdX = val & 0xffff;
1210 break;
1211 case 0xb540:
1212 virge->s3d_tri.TdAdX = val >> 16;
1213 virge->s3d_tri.TdRdX = val & 0xffff;
1214 break;
1215 case 0xb544:
1216 virge->s3d_tri.TdGdY = val >> 16;
1217 virge->s3d_tri.TdBdY = val & 0xffff;
1218 break;
1219 case 0xb548:
1220 virge->s3d_tri.TdAdY = val >> 16;
1221 virge->s3d_tri.TdRdY = val & 0xffff;
1222 break;
1223 case 0xb54c:
1224 virge->s3d_tri.tgs = (val >> 16) & 0xffff;
1225 virge->s3d_tri.tbs = val & 0xffff;
1226 break;
1227 case 0xb550:
1228 virge->s3d_tri.tas = (val >> 16) & 0xffff;
1229 virge->s3d_tri.trs = val & 0xffff;
1230 break;
1232 case 0xb554:
1233 virge->s3d_tri.TdZdX = val;
1234 break;
1235 case 0xb558:
1236 virge->s3d_tri.TdZdY = val;
1237 break;
1238 case 0xb55c:
1239 virge->s3d_tri.tzs = val;
1240 break;
1241 case 0xb560:
1242 virge->s3d_tri.TdXdY12 = val;
1243 break;
1244 case 0xb564:
1245 virge->s3d_tri.txend12 = val;
1246 break;
1247 case 0xb568:
1248 virge->s3d_tri.TdXdY01 = val;
1249 break;
1250 case 0xb56c:
1251 virge->s3d_tri.txend01 = val;
1252 break;
1253 case 0xb570:
1254 virge->s3d_tri.TdXdY02 = val;
1255 break;
1256 case 0xb574:
1257 virge->s3d_tri.txs = val;
1258 break;
1259 case 0xb578:
1260 virge->s3d_tri.tys = val;
1261 break;
1262 case 0xb57c:
1263 virge->s3d_tri.ty01 = (val >> 16) & 0x7ff;
1264 virge->s3d_tri.ty12 = val & 0x7ff;
1265 virge->s3d_tri.tlr = val >> 31;
1266 if (virge->s3d_tri.cmd_set & CMD_SET_AE)
1267 queue_triangle(virge);
1268 /* {
1269 thread_set_event(virge->wake_render_thread);
1270 thread_wait_event(virge->wake_main_thread, -1);
1271 }*/
1273 // s3_virge_triangle(virge);
1274 break;
1275 }
1276 }
1278 #define READ(addr, val) \
1279 do \
1280 { \
1281 switch (bpp) \
1282 { \
1283 case 0: /*8 bpp*/ \
1284 val = vram[addr & 0x3fffff]; \
1285 break; \
1286 case 1: /*16 bpp*/ \
1287 val = *(uint16_t *)&vram[addr & 0x3fffff]; \
1288 break; \
1289 case 2: /*24 bpp*/ \
1290 val = (*(uint32_t *)&vram[addr & 0x3fffff]) & 0xffffff; \
1291 break; \
1292 } \
1293 } while (0)
1295 #define Z_READ(addr) *(uint16_t *)&vram[addr & 0x3fffff]
1297 #define Z_WRITE(addr, val) if (!(s3d_tri->cmd_set & CMD_SET_ZB_MODE)) *(uint16_t *)&vram[addr & 0x3fffff] = val
1299 #define CLIP(x, y) \
1300 do \
1301 { \
1302 if ((virge->s3d.cmd_set & CMD_SET_HC) && \
1303 (x < virge->s3d.clip_l || \
1304 x > virge->s3d.clip_r || \
1305 y < virge->s3d.clip_t || \
1306 y > virge->s3d.clip_b)) \
1307 update = 0; \
1308 } while (0)
1310 #define CLIP_3D(x, y) \
1311 do \
1312 { \
1313 if ((s3d_tri->cmd_set & CMD_SET_HC) && \
1314 (x < s3d_tri->clip_l || \
1315 x > s3d_tri->clip_r || \
1316 y < s3d_tri->clip_t || \
1317 y > s3d_tri->clip_b)) \
1318 update = 0; \
1319 } while (0)
1321 #define Z_CLIP(Zzb, Zs) \
1322 do \
1323 { \
1324 if (!(s3d_tri->cmd_set & CMD_SET_ZB_MODE)) \
1325 switch ((s3d_tri->cmd_set >> 20) & 7) \
1326 { \
1327 case 0: update = 0; break; \
1328 case 1: if (Zs <= Zzb) update = 0; else Zzb = Zs; break; \
1329 case 2: if (Zs != Zzb) update = 0; else Zzb = Zs; break; \
1330 case 3: if (Zs < Zzb) update = 0; else Zzb = Zs; break; \
1331 case 4: if (Zs >= Zzb) update = 0; else Zzb = Zs; break; \
1332 case 5: if (Zs == Zzb) update = 0; else Zzb = Zs; break; \
1333 case 6: if (Zs > Zzb) update = 0; else Zzb = Zs; break; \
1334 case 7: update = 1; Zzb = Zs; break; \
1335 } \
1336 } while (0)
1338 #define MIX() \
1339 do \
1340 { \
1341 int c; \
1342 for (c = 0; c < 24; c++) \
1343 { \
1344 int d = (dest & (1 << c)) ? 1 : 0; \
1345 if (source & (1 << c)) d |= 2; \
1346 if (pattern & (1 << c)) d |= 4; \
1347 if (virge->s3d.rop & (1 << d)) out |= (1 << c); \
1348 } \
1349 } while (0)
1351 #define WRITE(addr, val) \
1352 do \
1353 { \
1354 switch (bpp) \
1355 { \
1356 case 0: /*8 bpp*/ \
1357 vram[addr & 0x3fffff] = val; \
1358 virge->svga.changedvram[(addr & 0x3fffff) >> 12] = changeframecount; \
1359 break; \
1360 case 1: /*16 bpp*/ \
1361 *(uint16_t *)&vram[addr & 0x3fffff] = val; \
1362 virge->svga.changedvram[(addr & 0x3fffff) >> 12] = changeframecount; \
1363 break; \
1364 case 2: /*24 bpp*/ \
1365 *(uint32_t *)&vram[addr & 0x3fffff] = (val & 0xffffff) | \
1366 (vram[(addr + 3) & 0x3fffff] << 24); \
1367 virge->svga.changedvram[(addr & 0x3fffff) >> 12] = changeframecount; \
1368 break; \
1369 } \
1370 } while (0)
1372 static void s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat)
1373 {
1374 int cpu_input = (count != -1);
1375 uint8_t *vram = virge->svga.vram;
1376 uint32_t mono_pattern[64];
1377 int count_mask;
1378 int x_inc = (virge->s3d.cmd_set & CMD_SET_XP) ? 1 : -1;
1379 int y_inc = (virge->s3d.cmd_set & CMD_SET_YP) ? 1 : -1;
1380 int bpp;
1381 int x_mul;
1382 int cpu_dat_shift;
1383 uint32_t *pattern_data;
1385 switch (virge->s3d.cmd_set & CMD_SET_FORMAT_MASK)
1386 {
1387 case CMD_SET_FORMAT_8:
1388 bpp = 0;
1389 x_mul = 1;
1390 cpu_dat_shift = 8;
1391 pattern_data = virge->s3d.pattern_8;
1392 break;
1393 case CMD_SET_FORMAT_16:
1394 bpp = 1;
1395 x_mul = 2;
1396 cpu_dat_shift = 16;
1397 pattern_data = virge->s3d.pattern_16;
1398 break;
1399 case CMD_SET_FORMAT_24:
1400 default:
1401 bpp = 2;
1402 x_mul = 3;
1403 cpu_dat_shift = 24;
1404 pattern_data = virge->s3d.pattern_32;
1405 break;
1406 }
1407 if (virge->s3d.cmd_set & CMD_SET_MP)
1408 pattern_data = mono_pattern;
1410 switch (virge->s3d.cmd_set & CMD_SET_ITA_MASK)
1411 {
1412 case CMD_SET_ITA_BYTE:
1413 count_mask = ~0x7;
1414 break;
1415 case CMD_SET_ITA_WORD:
1416 count_mask = ~0xf;
1417 break;
1418 case CMD_SET_ITA_DWORD:
1419 default:
1420 count_mask = ~0x1f;
1421 break;
1422 }
1423 if (virge->s3d.cmd_set & CMD_SET_MP)
1424 {
1425 int x, y;
1426 for (y = 0; y < 4; y++)
1427 {
1428 for (x = 0; x < 8; x++)
1429 {
1430 if (virge->s3d.mono_pat_0 & (1 << (x + y*8)))
1431 mono_pattern[y*8 + x] = virge->s3d.pat_fg_clr;
1432 else
1433 mono_pattern[y*8 + x] = virge->s3d.pat_bg_clr;
1434 if (virge->s3d.mono_pat_1 & (1 << (x + y*8)))
1435 mono_pattern[(y+4)*8 + x] = virge->s3d.pat_fg_clr;
1436 else
1437 mono_pattern[(y+4)*8 + x] = virge->s3d.pat_bg_clr;
1438 }
1439 }
1440 }
1441 switch (virge->s3d.cmd_set & CMD_SET_COMMAND_MASK)
1442 {
1443 case CMD_SET_COMMAND_NOP:
1444 break;
1446 case CMD_SET_COMMAND_BITBLT:
1447 if (count == -1)
1448 {
1449 virge->s3d.src_x = virge->s3d.rsrc_x;
1450 virge->s3d.src_y = virge->s3d.rsrc_y;
1451 virge->s3d.dest_x = virge->s3d.rdest_x;
1452 virge->s3d.dest_y = virge->s3d.rdest_y;
1453 virge->s3d.w = virge->s3d.r_width;
1454 virge->s3d.h = virge->s3d.r_height;
1455 virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff;
1456 virge->s3d.data_left_count = 0;
1458 /* pclog("BitBlt start %i,%i %i,%i %i,%i %02X %x %x\n",
1459 virge->s3d.src_x,
1460 virge->s3d.src_y,
1461 virge->s3d.dest_x,
1462 virge->s3d.dest_y,
1463 virge->s3d.w,
1464 virge->s3d.h,
1465 virge->s3d.rop,
1466 virge->s3d.src_base,
1467 virge->s3d.dest_base);*/
1469 if (virge->s3d.cmd_set & CMD_SET_IDS)
1470 return;
1471 }
1472 if (!virge->s3d.h)
1473 return;
1474 while (count)
1475 {
1476 uint32_t src_addr = virge->s3d.src_base + (virge->s3d.src_x * x_mul) + (virge->s3d.src_y * virge->s3d.src_str);
1477 uint32_t dest_addr = virge->s3d.dest_base + (virge->s3d.dest_x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str);
1478 uint32_t source, dest, pattern;
1479 uint32_t out = 0;
1480 int update = 1;
1482 switch (virge->s3d.cmd_set & (CMD_SET_MS | CMD_SET_IDS))
1483 {
1484 case 0:
1485 case CMD_SET_MS:
1486 READ(src_addr, source);
1487 if ((virge->s3d.cmd_set & CMD_SET_TP) && source == virge->s3d.src_fg_clr)
1488 update = 0;
1489 break;
1490 case CMD_SET_IDS:
1491 if (virge->s3d.data_left_count)
1492 {
1493 /*Handle shifting for 24-bit data*/
1494 source = virge->s3d.data_left;
1495 source |= ((cpu_dat << virge->s3d.data_left_count) & ~0xff000000);
1496 cpu_dat >>= (cpu_dat_shift - virge->s3d.data_left_count);
1497 count -= (cpu_dat_shift - virge->s3d.data_left_count);
1498 virge->s3d.data_left_count = 0;
1499 if (count < cpu_dat_shift)
1500 {
1501 virge->s3d.data_left = cpu_dat;
1502 virge->s3d.data_left_count = count;
1503 count = 0;
1504 }
1505 }
1506 else
1507 {
1508 source = cpu_dat;
1509 cpu_dat >>= cpu_dat_shift;
1510 count -= cpu_dat_shift;
1511 if (count < cpu_dat_shift)
1512 {
1513 virge->s3d.data_left = cpu_dat;
1514 virge->s3d.data_left_count = count;
1515 count = 0;
1516 }
1517 }
1518 if ((virge->s3d.cmd_set & CMD_SET_TP) && source == virge->s3d.src_fg_clr)
1519 update = 0;
1520 break;
1521 case CMD_SET_IDS | CMD_SET_MS:
1522 source = (cpu_dat & (1 << 31)) ? virge->s3d.src_fg_clr : virge->s3d.src_bg_clr;
1523 if ((virge->s3d.cmd_set & CMD_SET_TP) && !(cpu_dat & (1 << 31)))
1524 update = 0;
1525 cpu_dat <<= 1;
1526 count--;
1527 break;
1528 }
1530 CLIP(virge->s3d.dest_x, virge->s3d.dest_y);
1532 if (update)
1533 {
1534 READ(dest_addr, dest);
1535 pattern = pattern_data[(virge->s3d.dest_y & 7)*8 + (virge->s3d.dest_x & 7)];
1536 MIX();
1538 WRITE(dest_addr, out);
1539 }
1541 virge->s3d.src_x += x_inc;
1542 virge->s3d.dest_x += x_inc;
1543 if (!virge->s3d.w)
1544 {
1545 virge->s3d.src_x = virge->s3d.rsrc_x;
1546 virge->s3d.dest_x = virge->s3d.rdest_x;
1547 virge->s3d.w = virge->s3d.r_width;
1549 virge->s3d.src_y += y_inc;
1550 virge->s3d.dest_y += y_inc;
1551 virge->s3d.h--;
1553 switch (virge->s3d.cmd_set & (CMD_SET_MS | CMD_SET_IDS))
1554 {
1555 case CMD_SET_IDS:
1556 cpu_dat >>= (count - (count & count_mask));
1557 count &= count_mask;
1558 virge->s3d.data_left_count = 0;
1559 break;
1561 case CMD_SET_IDS | CMD_SET_MS:
1562 cpu_dat <<= (count - (count & count_mask));
1563 count &= count_mask;
1564 break;
1565 }
1566 if (!virge->s3d.h)
1567 {
1568 return;
1569 }
1570 }
1571 else
1572 virge->s3d.w--;
1573 }
1574 break;
1576 case CMD_SET_COMMAND_RECTFILL:
1577 /*No source, pattern = pat_fg_clr*/
1578 if (count == -1)
1579 {
1580 virge->s3d.src_x = virge->s3d.rsrc_x;
1581 virge->s3d.src_y = virge->s3d.rsrc_y;
1582 virge->s3d.dest_x = virge->s3d.rdest_x;
1583 virge->s3d.dest_y = virge->s3d.rdest_y;
1584 virge->s3d.w = virge->s3d.r_width;
1585 virge->s3d.h = virge->s3d.r_height;
1586 virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff;
1588 /* pclog("RctFll start %i,%i %i,%i %02X %08x\n", virge->s3d.dest_x,
1589 virge->s3d.dest_y,
1590 virge->s3d.w,
1591 virge->s3d.h,
1592 virge->s3d.rop, virge->s3d.dest_base);*/
1593 }
1595 while (count)
1596 {
1597 uint32_t dest_addr = virge->s3d.dest_base + (virge->s3d.dest_x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str);
1598 uint32_t source = 0, dest, pattern = virge->s3d.pat_fg_clr;
1599 uint32_t out = 0;
1600 int update = 1;
1602 CLIP(virge->s3d.dest_x, virge->s3d.dest_y);
1604 if (update)
1605 {
1606 READ(dest_addr, dest);
1608 MIX();
1610 WRITE(dest_addr, out);
1611 }
1613 virge->s3d.src_x += x_inc;
1614 virge->s3d.dest_x += x_inc;
1615 if (!virge->s3d.w)
1616 {
1617 virge->s3d.src_x = virge->s3d.rsrc_x;
1618 virge->s3d.dest_x = virge->s3d.rdest_x;
1619 virge->s3d.w = virge->s3d.r_width;
1621 virge->s3d.src_y += y_inc;
1622 virge->s3d.dest_y += y_inc;
1623 virge->s3d.h--;
1624 if (!virge->s3d.h)
1625 {
1626 return;
1627 }
1628 }
1629 else
1630 virge->s3d.w--;
1631 count--;
1632 }
1633 break;
1635 case CMD_SET_COMMAND_LINE:
1636 if (count == -1)
1637 {
1638 virge->s3d.dest_x = virge->s3d.lxstart;
1639 virge->s3d.dest_y = virge->s3d.lystart;
1640 virge->s3d.h = virge->s3d.lycnt;
1641 virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff;
1642 if (virge->s3d.ldx >= 0)
1643 virge->s3d.dest_x -= virge->s3d.ldx / 2;
1644 else
1645 virge->s3d.dest_x += virge->s3d.ldx / 2;
1646 //virge->s3d.dest_dest_x = virge->s3d.dest_x + virge->s3d.ldx;
1647 }
1648 while (virge->s3d.h)
1649 {
1650 int x = virge->s3d.dest_x >> 20;
1651 int new_x = (virge->s3d.dest_x + virge->s3d.ldx) >> 20;
1653 do
1654 {
1655 uint32_t dest_addr = virge->s3d.dest_base + (x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str);
1656 uint32_t source = 0, dest, pattern;
1657 uint32_t out = 0;
1658 int update = 1;
1660 CLIP(x, virge->s3d.dest_y);
1662 if (update)
1663 {
1664 READ(dest_addr, dest);
1665 pattern = virge->s3d.pat_fg_clr;
1667 MIX();
1669 WRITE(dest_addr, out);
1670 }
1672 if (x < new_x)
1673 x++;
1674 else if (x > new_x)
1675 x--;
1676 } while (x != new_x);
1678 virge->s3d.dest_x += virge->s3d.ldx;
1679 virge->s3d.dest_y--;
1680 virge->s3d.h--;
1681 }
1682 break;
1684 default:
1685 fatal("s3_virge_bitblt : blit command %i %08x\n", (virge->s3d.cmd_set >> 27) & 0xf, virge->s3d.cmd_set);
1686 }
1687 }
1689 #define RGB15_TO_24(val, r, g, b) b = ((val & 0x001f) << 3) | ((val & 0x001f) >> 2); \
1690 g = ((val & 0x03e0) >> 2) | ((val & 0x03e0) >> 7); \
1691 r = ((val & 0x7c00) >> 7) | ((val & 0x7c00) >> 12);
1693 #define RGB24_TO_24(val, r, g, b) b = val & 0xff; \
1694 g = (val & 0xff00) >> 8; \
1695 r = (val & 0xff0000) >> 16
1697 #define RGB15(r, g, b, dest) \
1698 if (virge->dithering_enabled) \
1699 { \
1700 int add = dither[_y & 3][_x & 3]; \
1701 int _r = (r > 248) ? 248 : r+add; \
1702 int _g = (g > 248) ? 248 : g+add; \
1703 int _b = (b > 248) ? 248 : b+add; \
1704 dest = ((_b >> 3) & 0x1f) | (((_g >> 3) & 0x1f) << 5) | (((_r >> 3) & 0x1f) << 10); \
1705 } \
1706 else \
1707 dest = ((b >> 3) & 0x1f) | (((g >> 3) & 0x1f) << 5) | (((r >> 3) & 0x1f) << 10)
1709 #define RGB24(r, g, b) ((b) | ((g) << 8) | ((r) << 16))
1711 typedef struct rgba_t
1712 {
1713 int r, g, b, a;
1714 } rgba_t;
1716 typedef struct s3d_state_t
1717 {
1718 int32_t r, g, b, a, u, v, d, w;
1720 int32_t base_r, base_g, base_b, base_a, base_u, base_v, base_d, base_w;
1722 uint32_t base_z;
1724 uint32_t tbu, tbv;
1726 uint32_t cmd_set;
1727 int max_d;
1729 uint16_t *texture[10];
1731 uint32_t tex_bdr_clr;
1733 int32_t x1, x2;
1734 int y;
1736 rgba_t dest_rgba;
1737 } s3d_state_t;
1739 typedef struct s3d_texture_state_t
1740 {
1741 int level;
1742 int texture_shift;
1744 int32_t u, v;
1745 } s3d_texture_state_t;
1747 static void (*tex_read)(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out);
1748 static void (*tex_sample)(s3d_state_t *state);
1749 static void (*dest_pixel)(s3d_state_t *state);
1751 #define MAX(a, b) ((a) > (b) ? (a) : (b))
1752 #define MIN(a, b) ((a) < (b) ? (a) : (b))
1754 static int _x, _y;
1756 static void tex_ARGB1555(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1757 {
1758 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1759 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1760 uint16_t val = state->texture[texture_state->level][offset];
1762 out->r = ((val & 0x7c00) >> 7) | ((val & 0x7000) >> 12);
1763 out->g = ((val & 0x03e0) >> 2) | ((val & 0x0380) >> 7);
1764 out->b = ((val & 0x001f) << 3) | ((val & 0x001c) >> 2);
1765 out->a = (val & 0x8000) ? 0xff : 0;
1766 }
1768 static void tex_ARGB1555_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1769 {
1770 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1771 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1772 uint16_t val = state->texture[texture_state->level][offset];
1774 if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000)
1775 val = state->tex_bdr_clr;
1777 out->r = ((val & 0x7c00) >> 7) | ((val & 0x7000) >> 12);
1778 out->g = ((val & 0x03e0) >> 2) | ((val & 0x0380) >> 7);
1779 out->b = ((val & 0x001f) << 3) | ((val & 0x001c) >> 2);
1780 out->a = (val & 0x8000) ? 0xff : 0;
1781 }
1783 static void tex_ARGB4444(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1784 {
1785 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1786 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1787 uint16_t val = state->texture[texture_state->level][offset];
1789 out->r = ((val & 0x0f00) >> 4) | ((val & 0x0f00) >> 8);
1790 out->g = (val & 0x00f0) | ((val & 0x00f0) >> 4);
1791 out->b = ((val & 0x000f) << 4) | (val & 0x000f);
1792 out->a = ((val & 0xf000) >> 8) | ((val & 0xf000) >> 12);
1793 }
1795 static void tex_ARGB4444_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1796 {
1797 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1798 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1799 uint16_t val = state->texture[texture_state->level][offset];
1801 if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000)
1802 val = state->tex_bdr_clr;
1804 out->r = ((val & 0x0f00) >> 4) | ((val & 0x0f00) >> 8);
1805 out->g = (val & 0x00f0) | ((val & 0x00f0) >> 4);
1806 out->b = ((val & 0x000f) << 4) | (val & 0x000f);
1807 out->a = ((val & 0xf000) >> 8) | ((val & 0xf000) >> 12);
1808 }
1810 static void tex_ARGB8888(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1811 {
1812 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1813 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1814 uint32_t val = ((uint32_t *)state->texture[texture_state->level])[offset];
1816 out->r = (val >> 16) & 0xff;
1817 out->g = (val >> 8) & 0xff;
1818 out->b = val & 0xff;
1819 out->a = (val >> 24) & 0xff;
1820 }
1821 static void tex_ARGB8888_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1822 {
1823 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1824 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1825 uint32_t val = ((uint32_t *)state->texture[texture_state->level])[offset];
1827 if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000)
1828 val = state->tex_bdr_clr;
1830 out->r = (val >> 16) & 0xff;
1831 out->g = (val >> 8) & 0xff;
1832 out->b = val & 0xff;
1833 out->a = (val >> 24) & 0xff;
1834 }
1836 static void tex_sample_normal(s3d_state_t *state)
1837 {
1838 s3d_texture_state_t texture_state;
1840 texture_state.level = state->max_d;
1841 texture_state.texture_shift = 18 + (9 - texture_state.level);
1842 texture_state.u = state->u + state->tbu;
1843 texture_state.v = state->v + state->tbv;
1845 tex_read(state, &texture_state, &state->dest_rgba);
1846 }
1848 static void tex_sample_normal_filter(s3d_state_t *state)
1849 {
1850 s3d_texture_state_t texture_state;
1851 int tex_offset;
1852 rgba_t tex_samples[4];
1853 int du, dv;
1854 int d[4];
1856 texture_state.level = state->max_d;
1857 texture_state.texture_shift = 18 + (9 - texture_state.level);
1858 tex_offset = 1 << texture_state.texture_shift;
1860 texture_state.u = state->u + state->tbu;
1861 texture_state.v = state->v + state->tbv;
1862 tex_read(state, &texture_state, &tex_samples[0]);
1863 du = (texture_state.u >> (texture_state.texture_shift - 8)) & 0xff;
1864 dv = (texture_state.v >> (texture_state.texture_shift - 8)) & 0xff;
1866 texture_state.u = state->u + state->tbu + tex_offset;
1867 texture_state.v = state->v + state->tbv;
1868 tex_read(state, &texture_state, &tex_samples[1]);
1870 texture_state.u = state->u + state->tbu;
1871 texture_state.v = state->v + state->tbv + tex_offset;
1872 tex_read(state, &texture_state, &tex_samples[2]);
1874 texture_state.u = state->u + state->tbu + tex_offset;
1875 texture_state.v = state->v + state->tbv + tex_offset;
1876 tex_read(state, &texture_state, &tex_samples[3]);
1878 d[0] = (256 - du) * (256 - dv);
1879 d[1] = du * (256 - dv);
1880 d[2] = (256 - du) * dv;
1881 d[3] = du * dv;
1883 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
1884 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
1885 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
1886 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
1887 }
1889 static void tex_sample_mipmap(s3d_state_t *state)
1890 {
1891 s3d_texture_state_t texture_state;
1893 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
1894 if (texture_state.level < 0)
1895 texture_state.level = 0;
1896 texture_state.texture_shift = 18 + (9 - texture_state.level);
1897 texture_state.u = state->u + state->tbu;
1898 texture_state.v = state->v + state->tbv;
1900 tex_read(state, &texture_state, &state->dest_rgba);
1901 }
1903 static void tex_sample_mipmap_filter(s3d_state_t *state)
1904 {
1905 s3d_texture_state_t texture_state;
1906 int tex_offset;
1907 rgba_t tex_samples[4];
1908 int du, dv;
1909 int d[4];
1911 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
1912 if (texture_state.level < 0)
1913 texture_state.level = 0;
1914 texture_state.texture_shift = 18 + (9 - texture_state.level);
1915 tex_offset = 1 << texture_state.texture_shift;
1917 texture_state.u = state->u + state->tbu;
1918 texture_state.v = state->v + state->tbv;
1919 tex_read(state, &texture_state, &tex_samples[0]);
1920 du = (texture_state.u >> (texture_state.texture_shift - 8)) & 0xff;
1921 dv = (texture_state.v >> (texture_state.texture_shift - 8)) & 0xff;
1923 texture_state.u = state->u + state->tbu + tex_offset;
1924 texture_state.v = state->v + state->tbv;
1925 tex_read(state, &texture_state, &tex_samples[1]);
1927 texture_state.u = state->u + state->tbu;
1928 texture_state.v = state->v + state->tbv + tex_offset;
1929 tex_read(state, &texture_state, &tex_samples[2]);
1931 texture_state.u = state->u + state->tbu + tex_offset;
1932 texture_state.v = state->v + state->tbv + tex_offset;
1933 tex_read(state, &texture_state, &tex_samples[3]);
1935 d[0] = (256 - du) * (256 - dv);
1936 d[1] = du * (256 - dv);
1937 d[2] = (256 - du) * dv;
1938 d[3] = du * dv;
1940 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
1941 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
1942 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
1943 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
1944 }
1946 static void tex_sample_persp_normal(s3d_state_t *state)
1947 {
1948 s3d_texture_state_t texture_state;
1949 int32_t w = 0;
1951 if (state->w)
1952 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
1954 texture_state.level = state->max_d;
1955 texture_state.texture_shift = 18 + (9 - texture_state.level);
1956 texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu;
1957 texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv;
1959 tex_read(state, &texture_state, &state->dest_rgba);
1960 }
1962 static void tex_sample_persp_normal_filter(s3d_state_t *state)
1963 {
1964 s3d_texture_state_t texture_state;
1965 int32_t w = 0, u, v;
1966 int tex_offset;
1967 rgba_t tex_samples[4];
1968 int du, dv;
1969 int d[4];
1971 if (state->w)
1972 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
1974 u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu;
1975 v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv;
1977 texture_state.level = state->max_d;
1978 texture_state.texture_shift = 18 + (9 - texture_state.level);
1979 tex_offset = 1 << texture_state.texture_shift;
1981 texture_state.u = u;
1982 texture_state.v = v;
1983 tex_read(state, &texture_state, &tex_samples[0]);
1984 du = (u >> (texture_state.texture_shift - 8)) & 0xff;
1985 dv = (v >> (texture_state.texture_shift - 8)) & 0xff;
1987 texture_state.u = u + tex_offset;
1988 texture_state.v = v;
1989 tex_read(state, &texture_state, &tex_samples[1]);
1991 texture_state.u = u;
1992 texture_state.v = v + tex_offset;
1993 tex_read(state, &texture_state, &tex_samples[2]);
1995 texture_state.u = u + tex_offset;
1996 texture_state.v = v + tex_offset;
1997 tex_read(state, &texture_state, &tex_samples[3]);
1999 d[0] = (256 - du) * (256 - dv);
2000 d[1] = du * (256 - dv);
2001 d[2] = (256 - du) * dv;
2002 d[3] = du * dv;
2004 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
2005 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
2006 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
2007 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
2008 }
2010 static void tex_sample_persp_normal_375(s3d_state_t *state)
2011 {
2012 s3d_texture_state_t texture_state;
2013 int32_t w = 0;
2015 if (state->w)
2016 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
2018 texture_state.level = state->max_d;
2019 texture_state.texture_shift = 18 + (9 - texture_state.level);
2020 texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu;
2021 texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv;
2023 tex_read(state, &texture_state, &state->dest_rgba);
2024 }
2026 static void tex_sample_persp_normal_filter_375(s3d_state_t *state)
2027 {
2028 s3d_texture_state_t texture_state;
2029 int32_t w = 0, u, v;
2030 int tex_offset;
2031 rgba_t tex_samples[4];
2032 int du, dv;
2033 int d[4];
2035 if (state->w)
2036 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
2038 u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu;
2039 v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv;
2041 texture_state.level = state->max_d;
2042 texture_state.texture_shift = 18 + (9 - texture_state.level);
2043 tex_offset = 1 << texture_state.texture_shift;
2045 texture_state.u = u;
2046 texture_state.v = v;
2047 tex_read(state, &texture_state, &tex_samples[0]);
2048 du = (u >> (texture_state.texture_shift - 8)) & 0xff;
2049 dv = (v >> (texture_state.texture_shift - 8)) & 0xff;
2051 texture_state.u = u + tex_offset;
2052 texture_state.v = v;
2053 tex_read(state, &texture_state, &tex_samples[1]);
2055 texture_state.u = u;
2056 texture_state.v = v + tex_offset;
2057 tex_read(state, &texture_state, &tex_samples[2]);
2059 texture_state.u = u + tex_offset;
2060 texture_state.v = v + tex_offset;
2061 tex_read(state, &texture_state, &tex_samples[3]);
2063 d[0] = (256 - du) * (256 - dv);
2064 d[1] = du * (256 - dv);
2065 d[2] = (256 - du) * dv;
2066 d[3] = du * dv;
2068 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
2069 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
2070 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
2071 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
2072 }
2075 static void tex_sample_persp_mipmap(s3d_state_t *state)
2076 {
2077 s3d_texture_state_t texture_state;
2078 int32_t w = 0;
2080 if (state->w)
2081 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
2083 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
2084 if (texture_state.level < 0)
2085 texture_state.level = 0;
2086 texture_state.texture_shift = 18 + (9 - texture_state.level);
2087 texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu;
2088 texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv;
2090 tex_read(state, &texture_state, &state->dest_rgba);
2091 }
2093 static void tex_sample_persp_mipmap_filter(s3d_state_t *state)
2094 {
2095 s3d_texture_state_t texture_state;
2096 int32_t w = 0, u, v;
2097 int tex_offset;
2098 rgba_t tex_samples[4];
2099 int du, dv;
2100 int d[4];
2102 if (state->w)
2103 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
2105 u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu;
2106 v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv;
2108 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
2109 if (texture_state.level < 0)
2110 texture_state.level = 0;
2111 texture_state.texture_shift = 18 + (9 - texture_state.level);
2112 tex_offset = 1 << texture_state.texture_shift;
2114 texture_state.u = u;
2115 texture_state.v = v;
2116 tex_read(state, &texture_state, &tex_samples[0]);
2117 du = (u >> (texture_state.texture_shift - 8)) & 0xff;
2118 dv = (v >> (texture_state.texture_shift - 8)) & 0xff;
2120 texture_state.u = u + tex_offset;
2121 texture_state.v = v;
2122 tex_read(state, &texture_state, &tex_samples[1]);
2124 texture_state.u = u;
2125 texture_state.v = v + tex_offset;
2126 tex_read(state, &texture_state, &tex_samples[2]);
2128 texture_state.u = u + tex_offset;
2129 texture_state.v = v + tex_offset;
2130 tex_read(state, &texture_state, &tex_samples[3]);
2132 d[0] = (256 - du) * (256 - dv);
2133 d[1] = du * (256 - dv);
2134 d[2] = (256 - du) * dv;
2135 d[3] = du * dv;
2137 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
2138 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
2139 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
2140 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
2141 }
2143 static void tex_sample_persp_mipmap_375(s3d_state_t *state)
2144 {
2145 s3d_texture_state_t texture_state;
2146 int32_t w = 0;
2148 if (state->w)
2149 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
2151 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
2152 if (texture_state.level < 0)
2153 texture_state.level = 0;
2154 texture_state.texture_shift = 18 + (9 - texture_state.level);
2155 texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu;
2156 texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv;
2158 tex_read(state, &texture_state, &state->dest_rgba);
2159 }
2161 static void tex_sample_persp_mipmap_filter_375(s3d_state_t *state)
2162 {
2163 s3d_texture_state_t texture_state;
2164 int32_t w = 0, u, v;
2165 int tex_offset;
2166 rgba_t tex_samples[4];
2167 int du, dv;
2168 int d[4];
2170 if (state->w)
2171 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
2173 u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu;
2174 v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv;
2176 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
2177 if (texture_state.level < 0)
2178 texture_state.level = 0;
2179 texture_state.texture_shift = 18 + (9 - texture_state.level);
2180 tex_offset = 1 << texture_state.texture_shift;
2182 texture_state.u = u;
2183 texture_state.v = v;
2184 tex_read(state, &texture_state, &tex_samples[0]);
2185 du = (u >> (texture_state.texture_shift - 8)) & 0xff;
2186 dv = (v >> (texture_state.texture_shift - 8)) & 0xff;
2188 texture_state.u = u + tex_offset;
2189 texture_state.v = v;
2190 tex_read(state, &texture_state, &tex_samples[1]);
2192 texture_state.u = u;
2193 texture_state.v = v + tex_offset;
2194 tex_read(state, &texture_state, &tex_samples[2]);
2196 texture_state.u = u + tex_offset;
2197 texture_state.v = v + tex_offset;
2198 tex_read(state, &texture_state, &tex_samples[3]);
2200 d[0] = (256 - du) * (256 - dv);
2201 d[1] = du * (256 - dv);
2202 d[2] = (256 - du) * dv;
2203 d[3] = du * dv;
2205 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
2206 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
2207 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
2208 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
2209 }
2212 #define CLAMP(x) do \
2213 { \
2214 if ((x) & ~0xff) \
2215 x = ((x) < 0) ? 0 : 0xff; \
2216 } \
2217 while (0)
2219 #define CLAMP_RGBA(r, g, b, a) \
2220 if ((r) & ~0xff) \
2221 r = ((r) < 0) ? 0 : 0xff; \
2222 if ((g) & ~0xff) \
2223 g = ((g) < 0) ? 0 : 0xff; \
2224 if ((b) & ~0xff) \
2225 b = ((b) < 0) ? 0 : 0xff; \
2226 if ((a) & ~0xff) \
2227 a = ((a) < 0) ? 0 : 0xff;
2229 #define CLAMP_RGB(r, g, b) do \
2230 { \
2231 if ((r) < 0) \
2232 r = 0; \
2233 if ((r) > 0xff) \
2234 r = 0xff; \
2235 if ((g) < 0) \
2236 g = 0; \
2237 if ((g) > 0xff) \
2238 g = 0xff; \
2239 if ((b) < 0) \
2240 b = 0; \
2241 if ((b) > 0xff) \
2242 b = 0xff; \
2243 } \
2244 while (0)
2246 static void dest_pixel_gouraud_shaded_triangle(s3d_state_t *state)
2247 {
2248 state->dest_rgba.r = state->r >> 7;
2249 CLAMP(state->dest_rgba.r);
2251 state->dest_rgba.g = state->g >> 7;
2252 CLAMP(state->dest_rgba.g);
2254 state->dest_rgba.b = state->b >> 7;
2255 CLAMP(state->dest_rgba.b);
2257 state->dest_rgba.a = state->a >> 7;
2258 CLAMP(state->dest_rgba.a);
2259 }
2261 static void dest_pixel_unlit_texture_triangle(s3d_state_t *state)
2262 {
2263 tex_sample(state);
2265 if (state->cmd_set & CMD_SET_ABC_SRC)
2266 state->dest_rgba.a = state->a >> 7;
2267 }
2269 static void dest_pixel_lit_texture_decal(s3d_state_t *state)
2270 {
2271 tex_sample(state);
2273 if (state->cmd_set & CMD_SET_ABC_SRC)
2274 state->dest_rgba.a = state->a >> 7;
2275 }
2277 static void dest_pixel_lit_texture_reflection(s3d_state_t *state)
2278 {
2279 tex_sample(state);
2281 state->dest_rgba.r += (state->r >> 7);
2282 state->dest_rgba.g += (state->g >> 7);
2283 state->dest_rgba.b += (state->b >> 7);
2284 if (state->cmd_set & CMD_SET_ABC_SRC)
2285 state->dest_rgba.a += (state->a >> 7);
2287 CLAMP_RGBA(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b, state->dest_rgba.a);
2288 }
2290 static void dest_pixel_lit_texture_modulate(s3d_state_t *state)
2291 {
2292 int r = state->r >> 7, g = state->g >> 7, b = state->b >> 7, a = state->a >> 7;
2294 tex_sample(state);
2296 CLAMP_RGBA(r, g, b, a);
2298 state->dest_rgba.r = ((state->dest_rgba.r) * r) >> 8;
2299 state->dest_rgba.g = ((state->dest_rgba.g) * g) >> 8;
2300 state->dest_rgba.b = ((state->dest_rgba.b) * b) >> 8;
2302 if (state->cmd_set & CMD_SET_ABC_SRC)
2303 state->dest_rgba.a = a;
2304 }
2306 static void tri(virge_t *virge, s3d_t *s3d_tri, s3d_state_t *state, int yc, int32_t dx1, int32_t dx2)
2307 {
2308 uint8_t *vram = virge->svga.vram;
2310 int x_dir = s3d_tri->tlr ? 1 : -1;
2312 int use_z = !(s3d_tri->cmd_set & CMD_SET_ZB_MODE);
2314 int y_count = yc;
2316 int bpp = (s3d_tri->cmd_set >> 2) & 7;
2318 uint32_t dest_offset, z_offset;
2320 if (s3d_tri->cmd_set & CMD_SET_HC)
2321 {
2322 if (state->y < s3d_tri->clip_t)
2323 return;
2324 if (state->y > s3d_tri->clip_b)
2325 {
2326 int diff_y = state->y - s3d_tri->clip_b;
2328 if (diff_y > y_count)
2329 diff_y = y_count;
2331 state->base_u += (s3d_tri->TdUdY * diff_y);
2332 state->base_v += (s3d_tri->TdVdY * diff_y);
2333 state->base_z += (s3d_tri->TdZdY * diff_y);
2334 state->base_r += (s3d_tri->TdRdY * diff_y);
2335 state->base_g += (s3d_tri->TdGdY * diff_y);
2336 state->base_b += (s3d_tri->TdBdY * diff_y);
2337 state->base_a += (s3d_tri->TdAdY * diff_y);
2338 state->base_d += (s3d_tri->TdDdY * diff_y);
2339 state->base_w += (s3d_tri->TdWdY * diff_y);
2340 state->x1 += (dx1 * diff_y);
2341 state->x2 += (dx2 * diff_y);
2342 state->y -= diff_y;
2343 dest_offset -= s3d_tri->dest_str;
2344 z_offset -= s3d_tri->z_str;
2345 y_count -= diff_y;
2346 }
2347 if ((state->y - y_count) < s3d_tri->clip_t)
2348 y_count = state->y - s3d_tri->clip_t;
2349 }
2351 dest_offset = s3d_tri->dest_base + (state->y * s3d_tri->dest_str);
2352 z_offset = s3d_tri->z_base + (state->y * s3d_tri->z_str);
2354 for (; y_count > 0; y_count--)
2355 {
2356 int x = (state->x1 + ((1 << 20) - 1)) >> 20;
2357 int xe = (state->x2 + ((1 << 20) - 1)) >> 20;
2358 uint32_t z = (state->base_z > 0) ? (state->base_z << 1) : 0;
2359 if (x_dir < 0)
2360 {
2361 x--;
2362 xe--;
2363 }
2365 if (x != xe && (x_dir > 0 && x < xe) || (x_dir < 0 && x > xe))
2366 {
2367 uint32_t dest_addr, z_addr;
2368 int dx = (x_dir > 0) ? ((31 - ((state->x1-1) >> 15)) & 0x1f) : (((state->x1-1) >> 15) & 0x1f);
2369 int x_offset = x_dir * (bpp + 1);
2370 int xz_offset = x_dir << 1;
2371 if (x_dir > 0)
2372 dx += 1;
2373 state->r = state->base_r + ((s3d_tri->TdRdX * dx) >> 5);
2374 state->g = state->base_g + ((s3d_tri->TdGdX * dx) >> 5);
2375 state->b = state->base_b + ((s3d_tri->TdBdX * dx) >> 5);
2376 state->a = state->base_a + ((s3d_tri->TdAdX * dx) >> 5);
2377 state->u = state->base_u + ((s3d_tri->TdUdX * dx) >> 5);
2378 state->v = state->base_v + ((s3d_tri->TdVdX * dx) >> 5);
2379 state->w = state->base_w + ((s3d_tri->TdWdX * dx) >> 5);
2380 state->d = state->base_d + ((s3d_tri->TdDdX * dx) >> 5);
2381 z += ((s3d_tri->TdZdX * dx) >> 5);
2383 // pclog("Draw Y=%i X=%i to XE=%i %i %08x %08x %08x %08x %08x %08x %08x %08x %i %08x\n", state->y, x, xe, dx, state->x1, state->x2, dx1, virge->s3d.TdWdX, state->u, state->v, virge->s3d.TdUdX, virge->s3d.TdUdY, dx, (virge->s3d.TdUdX * dx) >> 4);
2385 if (s3d_tri->cmd_set & CMD_SET_HC)
2386 {
2387 if (x_dir > 0)
2388 {
2389 if (x > s3d_tri->clip_r)
2390 goto tri_skip_line;
2391 if (xe < s3d_tri->clip_l)
2392 goto tri_skip_line;
2393 if (xe > s3d_tri->clip_r)
2394 xe = s3d_tri->clip_r;
2395 if (x < s3d_tri->clip_l)
2396 {
2397 int diff_x = s3d_tri->clip_l - x;
2399 z += (s3d_tri->TdZdX * diff_x);
2400 state->u += (s3d_tri->TdUdX * diff_x);
2401 state->v += (s3d_tri->TdVdX * diff_x);
2402 state->r += (s3d_tri->TdRdX * diff_x);
2403 state->g += (s3d_tri->TdGdX * diff_x);
2404 state->b += (s3d_tri->TdBdX * diff_x);
2405 state->a += (s3d_tri->TdAdX * diff_x);
2406 state->d += (s3d_tri->TdDdX * diff_x);
2407 state->w += (s3d_tri->TdWdX * diff_x);
2409 x = s3d_tri->clip_l;
2410 }
2411 }
2412 else
2413 {
2414 if (x < s3d_tri->clip_l)
2415 goto tri_skip_line;
2416 if (xe > s3d_tri->clip_r)
2417 goto tri_skip_line;
2418 if (xe < s3d_tri->clip_l)
2419 xe = s3d_tri->clip_l;
2420 if (x > s3d_tri->clip_r)
2421 {
2422 int diff_x = x - s3d_tri->clip_r;
2424 z += (s3d_tri->TdZdX * diff_x);
2425 state->u += (s3d_tri->TdUdX * diff_x);
2426 state->v += (s3d_tri->TdVdX * diff_x);
2427 state->r += (s3d_tri->TdRdX * diff_x);
2428 state->g += (s3d_tri->TdGdX * diff_x);
2429 state->b += (s3d_tri->TdBdX * diff_x);
2430 state->a += (s3d_tri->TdAdX * diff_x);
2431 state->d += (s3d_tri->TdDdX * diff_x);
2432 state->w += (s3d_tri->TdWdX * diff_x);
2434 x = s3d_tri->clip_r;
2435 }
2436 }
2437 }
2439 virge->svga.changedvram[(dest_offset & 0x3fffff) >> 12] = changeframecount;
2441 dest_addr = dest_offset + (x * (bpp + 1));
2442 z_addr = z_offset + (x << 1);
2444 for (; x != xe; x = (x + x_dir) & 0xfff)
2445 {
2446 int update = 1;
2447 uint16_t src_z;
2448 _x = x; _y = state->y;
2450 if (use_z)
2451 {
2452 src_z = Z_READ(z_addr);
2453 Z_CLIP(src_z, z >> 16);
2454 }
2456 if (update)
2457 {
2458 uint32_t dest_col;
2460 dest_pixel(state);
2462 if (s3d_tri->cmd_set & CMD_SET_ABC_ENABLE)
2463 {
2464 uint32_t src_col;
2465 int src_r, src_g, src_b;
2467 switch (bpp)
2468 {
2469 case 0: /*8 bpp*/
2470 /*Not implemented yet*/
2471 break;
2472 case 1: /*16 bpp*/
2473 src_col = *(uint16_t *)&vram[dest_addr & 0x3fffff];
2474 RGB15_TO_24(src_col, src_r, src_g, src_b);
2475 break;
2476 case 2: /*24 bpp*/
2477 src_col = (*(uint32_t *)&vram[dest_addr & 0x3fffff]) & 0xffffff;
2478 RGB24_TO_24(src_col, src_r, src_g, src_b);
2479 break;
2480 }
2482 state->dest_rgba.r = ((state->dest_rgba.r * state->dest_rgba.a) + (src_r * (255 - state->dest_rgba.a))) / 255;
2483 state->dest_rgba.g = ((state->dest_rgba.g * state->dest_rgba.a) + (src_g * (255 - state->dest_rgba.a))) / 255;
2484 state->dest_rgba.b = ((state->dest_rgba.b * state->dest_rgba.a) + (src_b * (255 - state->dest_rgba.a))) / 255;
2485 }
2487 switch (bpp)
2488 {
2489 case 0: /*8 bpp*/
2490 /*Not implemented yet*/
2491 break;
2492 case 1: /*16 bpp*/
2493 RGB15(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b, dest_col);
2494 *(uint16_t *)&vram[dest_addr] = dest_col;
2495 break;
2496 case 2: /*24 bpp*/
2497 dest_col = RGB24(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b);
2498 *(uint8_t *)&vram[dest_addr] = dest_col & 0xff;
2499 *(uint8_t *)&vram[dest_addr + 1] = (dest_col >> 8) & 0xff;
2500 *(uint8_t *)&vram[dest_addr + 2] = (dest_col >> 16) & 0xff;
2501 break;
2502 }
2504 if (use_z && (s3d_tri->cmd_set & CMD_SET_ZUP))
2505 Z_WRITE(z_addr, src_z);
2506 }
2508 z += s3d_tri->TdZdX;
2509 state->u += s3d_tri->TdUdX;
2510 state->v += s3d_tri->TdVdX;
2511 state->r += s3d_tri->TdRdX;
2512 state->g += s3d_tri->TdGdX;
2513 state->b += s3d_tri->TdBdX;
2514 state->a += s3d_tri->TdAdX;
2515 state->d += s3d_tri->TdDdX;
2516 state->w += s3d_tri->TdWdX;
2517 dest_addr += x_offset;
2518 z_addr += xz_offset;
2519 virge->pixel_count++;
2520 }
2521 }
2522 tri_skip_line:
2523 state->x1 += dx1;
2524 state->x2 += dx2;
2525 state->base_u += s3d_tri->TdUdY;
2526 state->base_v += s3d_tri->TdVdY;
2527 state->base_z += s3d_tri->TdZdY;
2528 state->base_r += s3d_tri->TdRdY;
2529 state->base_g += s3d_tri->TdGdY;
2530 state->base_b += s3d_tri->TdBdY;
2531 state->base_a += s3d_tri->TdAdY;
2532 state->base_d += s3d_tri->TdDdY;
2533 state->base_w += s3d_tri->TdWdY;
2534 state->y--;
2535 dest_offset -= s3d_tri->dest_str;
2536 z_offset -= s3d_tri->z_str;
2537 }
2538 }
2540 static int tex_size[8] =
2541 {
2542 4*2,
2543 2*2,
2544 2*2,
2545 1*2,
2546 2/1,
2547 2/1,
2548 1*2,
2549 1*2
2550 };
2552 static void s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri)
2553 {
2554 s3d_state_t state;
2556 uint32_t tex_base;
2557 int c;
2559 uint64_t start_time = timer_read();
2560 uint64_t end_time;
2562 state.tbu = s3d_tri->tbu << 11;
2563 state.tbv = s3d_tri->tbv << 11;
2565 state.max_d = (s3d_tri->cmd_set >> 8) & 15;
2567 state.tex_bdr_clr = s3d_tri->tex_bdr_clr;
2569 state.cmd_set = s3d_tri->cmd_set;
2571 state.base_u = s3d_tri->tus;
2572 state.base_v = s3d_tri->tvs;
2573 state.base_z = s3d_tri->tzs;
2574 state.base_r = (int32_t)s3d_tri->trs;
2575 state.base_g = (int32_t)s3d_tri->tgs;
2576 state.base_b = (int32_t)s3d_tri->tbs;
2577 state.base_a = (int32_t)s3d_tri->tas;
2578 state.base_d = s3d_tri->tds;
2579 state.base_w = s3d_tri->tws;
2581 tex_base = s3d_tri->tex_base;
2582 for (c = 9; c >= 0; c--)
2583 {
2584 state.texture[c] = (uint16_t *)&virge->svga.vram[tex_base];
2585 if (c <= state.max_d)
2586 tex_base += ((1 << (c*2)) * tex_size[(s3d_tri->cmd_set >> 5) & 7]) / 2;
2587 }
2589 switch ((s3d_tri->cmd_set >> 27) & 0xf)
2590 {
2591 case 0:
2592 dest_pixel = dest_pixel_gouraud_shaded_triangle;
2593 // pclog("dest_pixel_gouraud_shaded_triangle\n");
2594 break;
2595 case 1:
2596 case 5:
2597 switch ((s3d_tri->cmd_set >> 15) & 0x3)
2598 {
2599 case 0:
2600 dest_pixel = dest_pixel_lit_texture_reflection;
2601 // pclog("dest_pixel_lit_texture_reflection\n");
2602 break;
2603 case 1:
2604 dest_pixel = dest_pixel_lit_texture_modulate;
2605 // pclog("dest_pixel_lit_texture_modulate\n");
2606 break;
2607 case 2:
2608 dest_pixel = dest_pixel_lit_texture_decal;
2609 // pclog("dest_pixel_lit_texture_decal\n");
2610 break;
2611 default:
2612 pclog("bad triangle type %x\n", (s3d_tri->cmd_set >> 27) & 0xf);
2613 return;
2614 }
2615 break;
2616 case 2:
2617 case 6:
2618 dest_pixel = dest_pixel_unlit_texture_triangle;
2619 // pclog("dest_pixel_unlit_texture_triangle\n");
2620 break;
2621 default:
2622 pclog("bad triangle type %x\n", (s3d_tri->cmd_set >> 27) & 0xf);
2623 return;
2624 }
2626 switch (((s3d_tri->cmd_set >> 12) & 7) | ((s3d_tri->cmd_set & (1 << 29)) ? 8 : 0))
2627 {
2628 case 0: case 1:
2629 tex_sample = tex_sample_mipmap;
2630 // pclog("use tex_sample_mipmap\n");
2631 break;
2632 case 2: case 3:
2633 tex_sample = virge->bilinear_enabled ? tex_sample_mipmap_filter : tex_sample_mipmap;
2634 // pclog("use tex_sample_mipmap_filter\n");
2635 break;
2636 case 4: case 5:
2637 tex_sample = tex_sample_normal;
2638 // pclog("use tex_sample_normal\n");
2639 break;
2640 case 6: case 7:
2641 tex_sample = virge->bilinear_enabled ? tex_sample_normal_filter : tex_sample_normal;
2642 // pclog("use tex_sample_normal_filter\n");
2643 break;
2644 case (0 | 8): case (1 | 8):
2645 if (virge->is_375)
2646 tex_sample = tex_sample_persp_mipmap_375;
2647 else
2648 tex_sample = tex_sample_persp_mipmap;
2649 // pclog("use tex_sample_persp_mipmap\n");
2650 break;
2651 case (2 | 8): case (3 | 8):
2652 if (virge->is_375)
2653 tex_sample = virge->bilinear_enabled ? tex_sample_persp_mipmap_filter_375 : tex_sample_persp_mipmap_375;
2654 else
2655 tex_sample = virge->bilinear_enabled ? tex_sample_persp_mipmap_filter : tex_sample_persp_mipmap;
2656 // pclog("use tex_sample_persp_mipmap_filter\n");
2657 break;
2658 case (4 | 8): case (5 | 8):
2659 if (virge->is_375)
2660 tex_sample = tex_sample_persp_normal_375;
2661 else
2662 tex_sample = tex_sample_persp_normal;
2663 // pclog("use tex_sample_persp_normal\n");
2664 break;
2665 case (6 | 8): case (7 | 8):
2666 if (virge->is_375)
2667 tex_sample = virge->bilinear_enabled ? tex_sample_persp_normal_filter_375 : tex_sample_persp_normal_375;
2668 else
2669 tex_sample = virge->bilinear_enabled ? tex_sample_persp_normal_filter : tex_sample_persp_normal;
2670 // pclog("use tex_sample_persp_normal_filter\n");
2671 break;
2672 }
2674 switch ((s3d_tri->cmd_set >> 5) & 7)
2675 {
2676 case 0:
2677 tex_read = (s3d_tri->cmd_set & CMD_SET_TWE) ? tex_ARGB8888 : tex_ARGB8888_nowrap;
2678 break;
2679 case 1:
2680 tex_read = (s3d_tri->cmd_set & CMD_SET_TWE) ? tex_ARGB4444 : tex_ARGB4444_nowrap;
2681 // pclog("tex_ARGB4444\n");
2682 break;
2683 case 2:
2684 tex_read = (s3d_tri->cmd_set & CMD_SET_TWE) ? tex_ARGB1555 : tex_ARGB1555_nowrap;
2685 // pclog("tex_ARGB1555 %i\n", (s3d_tri->cmd_set >> 5) & 7);
2686 break;
2687 default:
2688 pclog("bad texture type %i\n", (s3d_tri->cmd_set >> 5) & 7);
2689 tex_read = (s3d_tri->cmd_set & CMD_SET_TWE) ? tex_ARGB1555 : tex_ARGB1555_nowrap;
2690 break;
2691 }
2693 // pclog("Triangle %i %i,%i to %i,%i %08x\n", y, x1 >> 20, y, s3d_tri->txend01 >> 20, y - (s3d_tri->ty01 + s3d_tri->ty12), state.cmd_set);
2695 state.y = s3d_tri->tys;
2696 state.x1 = s3d_tri->txs;
2697 state.x2 = s3d_tri->txend01;
2698 tri(virge, s3d_tri, &state, s3d_tri->ty01, s3d_tri->TdXdY02, s3d_tri->TdXdY01);
2699 state.x2 = s3d_tri->txend12;
2700 tri(virge, s3d_tri, &state, s3d_tri->ty12, s3d_tri->TdXdY02, s3d_tri->TdXdY12);
2702 virge->tri_count++;
2704 end_time = timer_read();
2706 virge_time += end_time - start_time;
2707 }
2709 static void render_thread(void *param)
2710 {
2711 virge_t *virge = (virge_t *)param;
2713 while (1)
2714 {
2715 thread_wait_event(virge->wake_render_thread, -1);
2716 thread_reset_event(virge->wake_render_thread);
2717 virge->s3d_busy = 1;
2718 while (!RB_EMPTY)
2719 {
2720 s3_virge_triangle(virge, &virge->s3d_buffer[virge->s3d_read_idx & RB_MASK]);
2721 virge->s3d_read_idx++;
2723 if (RB_ENTRIES == RB_SIZE - 1)
2724 thread_set_event(virge->not_full_event);
2725 }
2726 virge->s3d_busy = 0;
2727 }
2728 }
2730 static void queue_triangle(virge_t *virge)
2731 {
2732 int c;
2733 // pclog("queue_triangle: read=%i write=%i RB_ENTRIES=%i RB_FULL=%i\n", virge->s3d_read_idx, virge->s3d_write_idx, RB_ENTRIES, RB_FULL);
2734 if (RB_FULL)
2735 {
2736 thread_reset_event(virge->not_full_event);
2737 if (RB_FULL)
2738 thread_wait_event(virge->not_full_event, -1); /*Wait for room in ringbuffer*/
2739 }
2740 // pclog(" add at read=%i write=%i %i\n", virge->s3d_read_idx, virge->s3d_write_idx, virge->s3d_write_idx & RB_MASK);
2741 virge->s3d_buffer[virge->s3d_write_idx & RB_MASK] = virge->s3d_tri;
2742 virge->s3d_write_idx++;
2743 if (!virge->s3d_busy)
2744 thread_set_event(virge->wake_render_thread); /*Wake up render thread if moving from idle*/
2745 }
2747 static void s3_virge_hwcursor_draw(svga_t *svga, int displine)
2748 {
2749 virge_t *virge = (virge_t *)svga->p;
2750 int x;
2751 uint16_t dat[2];
2752 int xx;
2753 int offset = svga->hwcursor_latch.x - svga->hwcursor_latch.xoff;
2755 // pclog("HWcursor %i %i\n", svga->hwcursor_latch.x, svga->hwcursor_latch.y);
2756 for (x = 0; x < 64; x += 16)
2757 {
2758 dat[0] = (svga->vram[svga->hwcursor_latch.addr] << 8) | svga->vram[svga->hwcursor_latch.addr + 1];
2759 dat[1] = (svga->vram[svga->hwcursor_latch.addr + 2] << 8) | svga->vram[svga->hwcursor_latch.addr + 3];
2760 if (svga->crtc[0x55] & 0x10)
2761 {
2762 /*X11*/
2763 for (xx = 0; xx < 16; xx++)
2764 {
2765 if (offset >= svga->hwcursor_latch.x)
2766 {
2767 if (dat[0] & 0x8000)
2768 ((uint32_t *)buffer32->line[displine])[offset + 32] = virge->hwcursor_col[dat[1] >> 15];
2769 }
2771 offset++;
2772 dat[0] <<= 1;
2773 dat[1] <<= 1;
2774 }
2775 }
2776 else
2777 {
2778 /*Windows*/
2779 for (xx = 0; xx < 16; xx++)
2780 {
2781 if (offset >= svga->hwcursor_latch.x)
2782 {
2783 if (!(dat[0] & 0x8000))
2784 ((uint32_t *)buffer32->line[displine])[offset + 32] = virge->hwcursor_col[dat[1] >> 15];
2785 else if (dat[1] & 0x8000)
2786 ((uint32_t *)buffer32->line[displine])[offset + 32] ^= 0xffffff;
2787 // pclog("Plot %i, %i (%i %i) %04X %04X\n", offset, displine, x+xx, svga->hwcursor_on, dat[0], dat[1]);
2788 }
2790 offset++;
2791 dat[0] <<= 1;
2792 dat[1] <<= 1;
2793 }
2794 }
2795 svga->hwcursor_latch.addr += 4;
2796 }
2797 }
2799 #define DECODE_YCbCr() \
2800 do \
2801 { \
2802 int c; \
2803 \
2804 for (c = 0; c < 2; c++) \
2805 { \
2806 uint8_t y1, y2; \
2807 int8_t Cr, Cb; \
2808 int dR, dG, dB; \
2809 \
2810 y1 = src[0]; \
2811 Cr = src[1] - 0x80; \
2812 y2 = src[2]; \
2813 Cb = src[3] - 0x80; \
2814 src += 4; \
2815 \
2816 dR = (359*Cr) >> 8; \
2817 dG = (88*Cb + 183*Cr) >> 8; \
2818 dB = (453*Cb) >> 8; \
2819 \
2820 r[x_write] = y1 + dR; \
2821 CLAMP(r[x_write]); \
2822 g[x_write] = y1 - dG; \
2823 CLAMP(g[x_write]); \
2824 b[x_write] = y1 + dB; \
2825 CLAMP(b[x_write]); \
2826 \
2827 r[x_write+1] = y2 + dR; \
2828 CLAMP(r[x_write+1]); \
2829 g[x_write+1] = y2 - dG; \
2830 CLAMP(g[x_write+1]); \
2831 b[x_write+1] = y2 + dB; \
2832 CLAMP(b[x_write+1]); \
2833 \
2834 x_write = (x_write + 2) & 7; \
2835 } \
2836 } while (0)
2838 /*Both YUV formats are untested*/
2839 #define DECODE_YUV211() \
2840 do \
2841 { \
2842 uint8_t y1, y2, y3, y4; \
2843 int8_t U, V; \
2844 int dR, dG, dB; \
2845 \
2846 U = src[0] - 0x80; \
2847 y1 = (298 * (src[1] - 16)) >> 8; \
2848 y2 = (298 * (src[2] - 16)) >> 8; \
2849 V = src[3] - 0x80; \
2850 y3 = (298 * (src[4] - 16)) >> 8; \
2851 y4 = (298 * (src[5] - 16)) >> 8; \
2852 src += 6; \
2853 \
2854 dR = (309*V) >> 8; \
2855 dG = (100*U + 208*V) >> 8; \
2856 dB = (516*U) >> 8; \
2857 \
2858 r[x_write] = y1 + dR; \
2859 CLAMP(r[x_write]); \
2860 g[x_write] = y1 - dG; \
2861 CLAMP(g[x_write]); \
2862 b[x_write] = y1 + dB; \
2863 CLAMP(b[x_write]); \
2864 \
2865 r[x_write+1] = y2 + dR; \
2866 CLAMP(r[x_write+1]); \
2867 g[x_write+1] = y2 - dG; \
2868 CLAMP(g[x_write+1]); \
2869 b[x_write+1] = y2 + dB; \
2870 CLAMP(b[x_write+1]); \
2871 \
2872 r[x_write+2] = y2 + dR; \
2873 CLAMP(r[x_write+2]); \
2874 g[x_write+2] = y2 - dG; \
2875 CLAMP(g[x_write+2]); \
2876 b[x_write+2] = y2 + dB; \
2877 CLAMP(b[x_write+2]); \
2878 \
2879 r[x_write+3] = y2 + dR; \
2880 CLAMP(r[x_write+3]); \
2881 g[x_write+3] = y2 - dG; \
2882 CLAMP(g[x_write+3]); \
2883 b[x_write+3] = y2 + dB; \
2884 CLAMP(b[x_write+3]); \
2885 \
2886 x_write = (x_write + 4) & 7; \
2887 } while (0)
2889 #define DECODE_YUV422() \
2890 do \
2891 { \
2892 int c; \
2893 \
2894 for (c = 0; c < 2; c++) \
2895 { \
2896 uint8_t y1, y2; \
2897 int8_t U, V; \
2898 int dR, dG, dB; \
2899 \
2900 U = src[0] - 0x80; \
2901 y1 = (298 * (src[1] - 16)) >> 8; \
2902 V = src[2] - 0x80; \
2903 y2 = (298 * (src[3] - 16)) >> 8; \
2904 src += 4; \
2905 \
2906 dR = (309*V) >> 8; \
2907 dG = (100*U + 208*V) >> 8; \
2908 dB = (516*U) >> 8; \
2909 \
2910 r[x_write] = y1 + dR; \
2911 CLAMP(r[x_write]); \
2912 g[x_write] = y1 - dG; \
2913 CLAMP(g[x_write]); \
2914 b[x_write] = y1 + dB; \
2915 CLAMP(b[x_write]); \
2916 \
2917 r[x_write+1] = y2 + dR; \
2918 CLAMP(r[x_write+1]); \
2919 g[x_write+1] = y2 - dG; \
2920 CLAMP(g[x_write+1]); \
2921 b[x_write+1] = y2 + dB; \
2922 CLAMP(b[x_write+1]); \
2923 \
2924 x_write = (x_write + 2) & 7; \
2925 } \
2926 } while (0)
2928 #define DECODE_RGB555() \
2929 do \
2930 { \
2931 int c; \
2932 \
2933 for (c = 0; c < 4; c++) \
2934 { \
2935 uint16_t dat; \
2936 \
2937 dat = *(uint16_t *)src; \
2938 src += 2; \
2939 \
2940 r[x_write + c] = ((dat & 0x001f) << 3) | ((dat & 0x001f) >> 2); \
2941 g[x_write + c] = ((dat & 0x03e0) >> 2) | ((dat & 0x03e0) >> 7); \
2942 b[x_write + c] = ((dat & 0x7c00) >> 7) | ((dat & 0x7c00) >> 12); \
2943 } \
2944 x_write = (x_write + 4) & 7; \
2945 } while (0)
2947 #define DECODE_RGB565() \
2948 do \
2949 { \
2950 int c; \
2951 \
2952 for (c = 0; c < 4; c++) \
2953 { \
2954 uint16_t dat; \
2955 \
2956 dat = *(uint16_t *)src; \
2957 src += 2; \
2958 \
2959 r[x_write + c] = ((dat & 0x001f) << 3) | ((dat & 0x001f) >> 2); \
2960 g[x_write + c] = ((dat & 0x07e0) >> 3) | ((dat & 0x07e0) >> 9); \
2961 b[x_write + c] = ((dat & 0xf800) >> 8) | ((dat & 0xf800) >> 13); \
2962 } \
2963 x_write = (x_write + 4) & 7; \
2964 } while (0)
2966 #define DECODE_RGB888() \
2967 do \
2968 { \
2969 int c; \
2970 \
2971 for (c = 0; c < 4; c++) \
2972 { \
2973 r[x_write + c] = src[0]; \
2974 g[x_write + c] = src[1]; \
2975 b[x_write + c] = src[2]; \
2976 src += 3; \
2977 } \
2978 x_write = (x_write + 4) & 7; \
2979 } while (0)
2981 #define DECODE_XRGB8888() \
2982 do \
2983 { \
2984 int c; \
2985 \
2986 for (c = 0; c < 4; c++) \
2987 { \
2988 r[x_write + c] = src[0]; \
2989 g[x_write + c] = src[1]; \
2990 b[x_write + c] = src[2]; \
2991 src += 4; \
2992 } \
2993 x_write = (x_write + 4) & 7; \
2994 } while (0)
2996 #define OVERLAY_SAMPLE() \
2997 do \
2998 { \
2999 switch (virge->streams.sdif) \
3000 { \
3001 case 1: \
3002 DECODE_YCbCr(); \
3003 break; \
3004 case 2: \
3005 DECODE_YUV422(); \
3006 break; \
3007 case 3: \
3008 DECODE_RGB555(); \
3009 break; \
3010 case 4: \
3011 DECODE_YUV211(); \
3012 break; \
3013 case 5: \
3014 DECODE_RGB565(); \
3015 break; \
3016 case 6: \
3017 DECODE_RGB888(); \
3018 break; \
3019 case 7: \
3020 default: \
3021 DECODE_XRGB8888(); \
3022 break; \
3023 } \
3024 } while (0)
3026 static void s3_virge_overlay_draw(svga_t *svga, int displine)
3027 {
3028 virge_t *virge = (virge_t *)svga->p;
3029 int offset = (virge->streams.sec_x - virge->streams.pri_x) + 1;
3030 int h_acc = virge->streams.dda_horiz_accumulator;
3031 int r[8], g[8], b[8];
3032 int r_samp[8] = {0, 0, 0, 0, 0, 0, 0, 0};
3033 int g_samp[8] = {0, 0, 0, 0, 0, 0, 0, 0};
3034 int b_samp[8] = {0, 0, 0, 0, 0, 0, 0, 0};
3035 int x_size, x_read = 4, x_write = 4;
3036 int x;
3037 uint32_t *p;
3038 uint8_t *src = &svga->vram[svga->overlay_latch.addr];
3040 p = &((uint32_t *)buffer32->line[displine])[offset + 32];
3042 if ((offset + virge->streams.sec_w) > virge->streams.pri_w)
3043 x_size = (virge->streams.pri_w - virge->streams.sec_x) + 1;
3044 else
3045 x_size = virge->streams.sec_w + 1;
3047 OVERLAY_SAMPLE();
3049 for (x = 0; x < x_size; x++)
3050 {
3051 *p++ = r[x_read] | (g[x_read] << 8) | (b[x_read] << 16);
3053 h_acc += virge->streams.k1_horiz_scale;
3054 if (h_acc >= 0)
3055 {
3056 if ((x_read ^ (x_read + 1)) & ~3)
3057 OVERLAY_SAMPLE();
3058 x_read = (x_read + 1) & 7;
3060 h_acc += (virge->streams.k2_horiz_scale - virge->streams.k1_horiz_scale);
3061 }
3062 }
3064 svga->overlay_latch.v_acc += virge->streams.k1_vert_scale;
3065 if (svga->overlay_latch.v_acc >= 0)
3066 {
3067 svga->overlay_latch.v_acc += (virge->streams.k2_vert_scale - virge->streams.k1_vert_scale);
3068 svga->overlay_latch.addr += virge->streams.sec_stride;
3069 }
3070 }
3072 static uint8_t s3_virge_pci_read(int func, int addr, void *p)
3073 {
3074 virge_t *virge = (virge_t *)p;
3075 svga_t *svga = &virge->svga;
3076 uint8_t ret = 0;
3077 // pclog("S3 PCI read %08X ", addr);
3078 switch (addr)
3079 {
3080 case 0x00: ret = 0x33; break; /*'S3'*/
3081 case 0x01: ret = 0x53; break;
3083 case 0x02: ret = virge->virge_id_low; break;
3084 case 0x03: ret = virge->virge_id_high; break;
3086 case 0x04: ret = virge->pci_regs[0x04] & 0x27; break;
3088 case 0x07: ret = virge->pci_regs[0x07] & 0x36; break;
3090 case 0x08: ret = 0; break; /*Revision ID*/
3091 case 0x09: ret = 0; break; /*Programming interface*/
3093 case 0x0a: ret = 0x00; break; /*Supports VGA interface*/
3094 case 0x0b: ret = 0x03; /*output = 3; */break;
3096 case 0x0d: ret = virge->pci_regs[0x0d] & 0xf8; break;
3098 case 0x10: ret = 0x00; break;/*Linear frame buffer address*/
3099 case 0x11: ret = 0x00; break;
3100 case 0x12: ret = 0x00; break;
3101 case 0x13: ret = svga->crtc[0x59] & 0xfc; break;
3103 case 0x30: ret = virge->pci_regs[0x30] & 0x01; break; /*BIOS ROM address*/
3104 case 0x31: ret = 0x00; break;
3105 case 0x32: ret = virge->pci_regs[0x32]; break;
3106 case 0x33: ret = virge->pci_regs[0x33]; break;
3108 case 0x3c: ret = virge->pci_regs[0x3c]; break;
3110 case 0x3d: ret = 0x01; break; /*INTA*/
3112 case 0x3e: ret = 0x04; break;
3113 case 0x3f: ret = 0xff; break;
3115 }
3116 // pclog("%02X\n", ret);
3117 return ret;
3118 }
3120 static void s3_virge_pci_write(int func, int addr, uint8_t val, void *p)
3121 {
3122 virge_t *virge = (virge_t *)p;
3123 svga_t *svga = &virge->svga;
3124 // pclog("S3 PCI write %08X %02X %04X:%08X\n", addr, val, CS, pc);
3125 switch (addr)
3126 {
3127 case 0x00: case 0x01: case 0x02: case 0x03:
3128 case 0x08: case 0x09: case 0x0a: case 0x0b:
3129 case 0x3d: case 0x3e: case 0x3f:
3130 return;
3132 case PCI_REG_COMMAND:
3133 if (val & PCI_COMMAND_IO)
3134 {
3135 io_removehandler(0x03c0, 0x0020, s3_virge_in, NULL, NULL, s3_virge_out, NULL, NULL, virge);
3136 io_sethandler(0x03c0, 0x0020, s3_virge_in, NULL, NULL, s3_virge_out, NULL, NULL, virge);
3137 }
3138 else
3139 io_removehandler(0x03c0, 0x0020, s3_virge_in, NULL, NULL, s3_virge_out, NULL, NULL, virge);
3140 virge->pci_regs[PCI_REG_COMMAND] = val & 0x27;
3141 return;
3142 case 0x07:
3143 virge->pci_regs[0x07] = val & 0x3e;
3144 return;
3145 case 0x0d:
3146 virge->pci_regs[0x0d] = val & 0xf8;
3147 return;
3149 case 0x13:
3150 svga->crtc[0x59] = val & 0xfc;
3151 s3_virge_updatemapping(virge);
3152 return;
3154 case 0x30: case 0x32: case 0x33:
3155 virge->pci_regs[addr] = val;
3156 if (virge->pci_regs[0x30] & 0x01)
3157 {
3158 uint32_t addr = (virge->pci_regs[0x32] << 16) | (virge->pci_regs[0x33] << 24);
3159 // pclog("Virge bios_rom enabled at %08x\n", addr);
3160 mem_mapping_set_addr(&virge->bios_rom.mapping, addr, 0x8000);
3161 mem_mapping_enable(&virge->bios_rom.mapping);
3162 }
3163 else
3164 {
3165 // pclog("Virge bios_rom disabled\n");
3166 mem_mapping_disable(&virge->bios_rom.mapping);
3167 }
3168 return;
3169 case 0x3c:
3170 virge->pci_regs[0x3c] = val;
3171 return;
3172 }
3173 }
3175 static void *s3_virge_init()
3176 {
3177 virge_t *virge = malloc(sizeof(virge_t));
3178 memset(virge, 0, sizeof(virge_t));
3180 virge->bilinear_enabled = device_get_config_int("bilinear");
3181 virge->dithering_enabled = device_get_config_int("dithering");
3182 virge->memory_size = device_get_config_int("memory");
3184 svga_init(&virge->svga, virge, virge->memory_size << 20,
3185 s3_virge_recalctimings,
3186 s3_virge_in, s3_virge_out,
3187 s3_virge_hwcursor_draw,
3188 s3_virge_overlay_draw);
3190 rom_init(&virge->bios_rom, "roms/s3virge.bin", 0xc0000, 0x8000, 0x7fff, 0, MEM_MAPPING_EXTERNAL);
3191 if (PCI)
3192 mem_mapping_disable(&virge->bios_rom.mapping);
3194 mem_mapping_add(&virge->mmio_mapping, 0, 0, s3_virge_mmio_read,
3195 s3_virge_mmio_read_w,
3196 s3_virge_mmio_read_l,
3197 s3_virge_mmio_write,
3198 s3_virge_mmio_write_w,
3199 s3_virge_mmio_write_l,
3200 NULL,
3201 0,
3202 virge);
3203 mem_mapping_add(&virge->new_mmio_mapping, 0, 0, s3_virge_mmio_read,
3204 s3_virge_mmio_read_w,
3205 s3_virge_mmio_read_l,
3206 s3_virge_mmio_write,
3207 s3_virge_mmio_write_w,
3208 s3_virge_mmio_write_l,
3209 NULL,
3210 0,
3211 virge);
3212 mem_mapping_add(&virge->linear_mapping, 0, 0, svga_read_linear,
3213 svga_readw_linear,
3214 svga_readl_linear,
3215 svga_write_linear,
3216 svga_writew_linear,
3217 svga_writel_linear,
3218 NULL,
3219 0,
3220 &virge->svga);
3222 io_sethandler(0x03c0, 0x0020, s3_virge_in, NULL, NULL, s3_virge_out, NULL, NULL, virge);
3224 virge->pci_regs[4] = 3;
3225 virge->pci_regs[5] = 0;
3226 virge->pci_regs[6] = 0;
3227 virge->pci_regs[7] = 2;
3228 virge->pci_regs[0x32] = 0x0c;
3229 virge->pci_regs[0x3d] = 1;
3230 virge->pci_regs[0x3e] = 4;
3231 virge->pci_regs[0x3f] = 0xff;
3233 virge->virge_id_high = 0x56;
3234 virge->virge_id_low = 0x31;
3235 virge->virge_rev = 0;
3236 virge->virge_id = 0xe1;
3238 switch (virge->memory_size)
3239 {
3240 case 2:
3241 virge->svga.crtc[0x36] = 2 | (0 << 2) | (1 << 4) | (4 << 5);
3242 break;
3243 case 4:
3244 default:
3245 virge->svga.crtc[0x36] = 2 | (0 << 2) | (1 << 4) | (0 << 5);
3246 break;
3247 }
3249 virge->svga.crtc[0x37] = 1;// | (7 << 5);
3250 virge->svga.crtc[0x53] = 1 << 3;
3251 virge->svga.crtc[0x59] = 0x70;
3253 virge->is_375 = 0;
3255 pci_add(s3_virge_pci_read, s3_virge_pci_write, virge);
3257 virge->wake_render_thread = thread_create_event();
3258 virge->wake_main_thread = thread_create_event();
3259 virge->not_full_event = thread_create_event();
3260 virge->render_thread = thread_create(render_thread, virge);
3262 return virge;
3263 }
3265 static void *s3_virge_375_init()
3266 {
3267 virge_t *virge = malloc(sizeof(virge_t));
3268 memset(virge, 0, sizeof(virge_t));
3270 virge->bilinear_enabled = device_get_config_int("bilinear");
3271 virge->dithering_enabled = device_get_config_int("dithering");
3272 virge->memory_size = device_get_config_int("memory");
3274 svga_init(&virge->svga, virge, virge->memory_size << 20,
3275 s3_virge_recalctimings,
3276 s3_virge_in, s3_virge_out,
3277 s3_virge_hwcursor_draw,
3278 s3_virge_overlay_draw);
3280 rom_init(&virge->bios_rom, "roms/86c375_1.bin", 0xc0000, 0x8000, 0x7fff, 0, MEM_MAPPING_EXTERNAL);
3281 if (PCI)
3282 mem_mapping_disable(&virge->bios_rom.mapping);
3284 mem_mapping_add(&virge->mmio_mapping, 0, 0, s3_virge_mmio_read,
3285 s3_virge_mmio_read_w,
3286 s3_virge_mmio_read_l,
3287 s3_virge_mmio_write,
3288 s3_virge_mmio_write_w,
3289 s3_virge_mmio_write_l,
3290 NULL,
3291 0,
3292 virge);
3293 mem_mapping_add(&virge->new_mmio_mapping, 0, 0, s3_virge_mmio_read,
3294 s3_virge_mmio_read_w,
3295 s3_virge_mmio_read_l,
3296 s3_virge_mmio_write,
3297 s3_virge_mmio_write_w,
3298 s3_virge_mmio_write_l,
3299 NULL,
3300 0,
3301 virge);
3302 mem_mapping_add(&virge->linear_mapping, 0, 0, svga_read_linear,
3303 svga_readw_linear,
3304 svga_readl_linear,
3305 svga_write_linear,
3306 svga_writew_linear,
3307 svga_writel_linear,
3308 NULL,
3309 0,
3310 &virge->svga);
3312 io_sethandler(0x03c0, 0x0020, s3_virge_in, NULL, NULL, s3_virge_out, NULL, NULL, virge);
3314 virge->pci_regs[4] = 3;
3315 virge->pci_regs[5] = 0;
3316 virge->pci_regs[6] = 0;
3317 virge->pci_regs[7] = 2;
3318 virge->pci_regs[0x32] = 0x0c;
3319 virge->pci_regs[0x3d] = 1;
3320 virge->pci_regs[0x3e] = 4;
3321 virge->pci_regs[0x3f] = 0xff;
3323 virge->virge_id_high = 0x8a;
3324 virge->virge_id_low = 0x01;
3325 virge->virge_rev = 0;
3326 virge->virge_id = 0xe1;
3328 switch (virge->memory_size)
3329 {
3330 case 2:
3331 virge->svga.crtc[0x36] = 2 | (0 << 2) | (1 << 4) | (4 << 5);
3332 break;
3333 case 4:
3334 default:
3335 virge->svga.crtc[0x36] = 2 | (0 << 2) | (1 << 4) | (0 << 5);
3336 break;
3337 }
3338 // virge->svga.crtc[0x36] = 2 | (0 << 2) | (1 << 4);
3339 virge->svga.crtc[0x37] = 1;// | (7 << 5);
3340 virge->svga.crtc[0x53] = 1 << 3;
3341 virge->svga.crtc[0x59] = 0x70;
3343 virge->svga.crtc[0x6c] = 0x01;
3345 virge->is_375 = 1;
3347 pci_add(s3_virge_pci_read, s3_virge_pci_write, virge);
3349 virge->wake_render_thread = thread_create_event();
3350 virge->wake_main_thread = thread_create_event();
3351 virge->not_full_event = thread_create_event();
3352 virge->render_thread = thread_create(render_thread, virge);
3354 return virge;
3355 }
3357 static void s3_virge_close(void *p)
3358 {
3359 virge_t *virge = (virge_t *)p;
3360 FILE *f = fopen("vram.dmp", "wb");
3361 fwrite(virge->svga.vram, 4 << 20, 1, f);
3362 fclose(f);
3364 thread_kill(virge->render_thread);
3365 thread_destroy_event(virge->not_full_event);
3366 thread_destroy_event(virge->wake_main_thread);
3367 thread_destroy_event(virge->wake_render_thread);
3369 svga_close(&virge->svga);
3371 free(virge);
3372 }
3374 static int s3_virge_available()
3375 {
3376 return rom_present("roms/s3virge.bin");
3377 }
3379 static int s3_virge_375_available()
3380 {
3381 return rom_present("roms/86c375_1.bin");
3382 }
3384 static void s3_virge_speed_changed(void *p)
3385 {
3386 virge_t *virge = (virge_t *)p;
3388 svga_recalctimings(&virge->svga);
3389 }
3391 static void s3_virge_force_redraw(void *p)
3392 {
3393 virge_t *virge = (virge_t *)p;
3395 virge->svga.fullchange = changeframecount;
3396 }
3398 static void s3_virge_add_status_info(char *s, int max_len, void *p)
3399 {
3400 virge_t *virge = (virge_t *)p;
3401 char temps[256];
3402 uint64_t new_time = timer_read();
3403 uint64_t status_diff = new_time - status_time;
3404 status_time = new_time;
3406 if (!status_diff)
3407 status_diff = 1;
3409 svga_add_status_info(s, max_len, &virge->svga);
3410 sprintf(temps, "%f Mpixels/sec\n%f ktris/sec\n%f%% CPU\n%f%% CPU (real)\n%d writes %i reads\n\n", (double)virge->pixel_count/1000000.0, (double)virge->tri_count/1000.0, ((double)virge_time * 100.0) / timer_freq, ((double)virge_time * 100.0) / status_diff, reg_writes, reg_reads);
3411 strncat(s, temps, max_len);
3413 virge->pixel_count = virge->tri_count = 0;
3414 virge_time = 0;
3415 reg_reads = 0;
3416 reg_writes = 0;
3417 }
3419 static device_config_t s3_virge_config[] =
3420 {
3421 {
3422 .name = "memory",
3423 .description = "Memory size",
3424 .type = CONFIG_SELECTION,
3425 .selection =
3426 {
3427 {
3428 .description = "2 MB",
3429 .value = 2
3430 },
3431 {
3432 .description = "4 MB",
3433 .value = 4
3434 },
3435 {
3436 .description = ""
3437 }
3438 },
3439 .default_int = 4
3440 },
3441 {
3442 .name = "bilinear",
3443 .description = "Bilinear filtering",
3444 .type = CONFIG_BINARY,
3445 .default_int = 1
3446 },
3447 {
3448 .name = "dithering",
3449 .description = "Dithering",
3450 .type = CONFIG_BINARY,
3451 .default_int = 1
3452 },
3453 {
3454 .type = -1
3455 }
3456 };
3458 device_t s3_virge_device =
3459 {
3460 "Diamond Stealth 3D 2000 (S3 ViRGE)",
3461 0,
3462 s3_virge_init,
3463 s3_virge_close,
3464 s3_virge_available,
3465 s3_virge_speed_changed,
3466 s3_virge_force_redraw,
3467 s3_virge_add_status_info,
3468 s3_virge_config
3469 };
3471 device_t s3_virge_375_device =
3472 {
3473 "S3 ViRGE/DX",
3474 0,
3475 s3_virge_375_init,
3476 s3_virge_close,
3477 s3_virge_375_available,
3478 s3_virge_speed_changed,
3479 s3_virge_force_redraw,
3480 s3_virge_add_status_info,
3481 s3_virge_config
3482 };
