PCem
view src/vid_s3_virge.c @ 133:24b744b9a632
ViRGE S3D triangle rendering now uses worker thread.
Fixed clipping bug on ViRGE.
Fixed status window crash.
| author | TomW |
|---|---|
| date | Tue Jul 22 21:10:39 2014 +0100 |
| parents | 9834054948fc |
| children | ad7d877a3b53 |
line source
1 /*S3 ViRGE emulation*/
2 #include <stdlib.h>
3 #include "ibm.h"
4 #include "device.h"
5 #include "io.h"
6 #include "mem.h"
7 #include "pci.h"
8 #include "rom.h"
9 #include "thread.h"
10 #include "video.h"
11 #include "vid_s3_virge.h"
12 #include "vid_svga.h"
13 #include "vid_svga_render.h"
15 static uint64_t virge_time = 0;
16 static uint64_t status_time = 0;
17 static int reg_writes = 0, reg_reads = 0;
19 static int dither[4][4] =
20 {
21 0, 4, 1, 5,
22 6, 2, 7, 3,
23 1, 5, 0, 4,
24 7, 3, 6, 2,
25 };
27 #define RB_SIZE 256
28 #define RB_MASK (RB_SIZE - 1)
30 #define RB_ENTRIES (virge->s3d_write_idx - virge->s3d_read_idx)
31 #define RB_FULL (RB_ENTRIES == RB_SIZE)
32 #define RB_EMPTY (!RB_ENTRIES)
34 typedef struct s3d_t
35 {
36 uint32_t cmd_set;
37 int clip_l, clip_r, clip_t, clip_b;
39 uint32_t dest_base;
40 uint32_t dest_str;
42 uint32_t z_base;
43 uint32_t z_str;
45 uint32_t tex_base;
46 uint32_t tex_bdr_clr;
47 uint32_t tbv, tbu;
48 int32_t TdVdX, TdUdX;
49 int32_t TdVdY, TdUdY;
50 uint32_t tus, tvs;
52 int32_t TdZdX, TdZdY;
53 uint32_t tzs;
55 int32_t TdWdX, TdWdY;
56 uint32_t tws;
58 int32_t TdDdX, TdDdY;
59 uint32_t tds;
61 int16_t TdGdX, TdBdX, TdRdX, TdAdX;
62 int16_t TdGdY, TdBdY, TdRdY, TdAdY;
63 uint32_t tgs, tbs, trs, tas;
65 uint32_t TdXdY12;
66 uint32_t txend12;
67 uint32_t TdXdY01;
68 uint32_t txend01;
69 uint32_t TdXdY02;
70 uint32_t txs;
71 uint32_t tys;
72 int ty01, ty12, tlr;
73 } s3d_t;
75 typedef struct virge_t
76 {
77 mem_mapping_t linear_mapping;
78 mem_mapping_t mmio_mapping;
79 mem_mapping_t new_mmio_mapping;
81 rom_t bios_rom;
83 svga_t svga;
85 uint8_t bank;
86 uint8_t ma_ext;
87 int width;
88 int bpp;
90 uint8_t virge_id, virge_id_high, virge_id_low, virge_rev;
92 uint32_t linear_base, linear_size;
94 uint8_t pci_regs[256];
96 int is_375;
98 int bilinear_enabled;
99 int dithering_enabled;
100 int memory_size;
102 int pixel_count, tri_count;
104 thread_t *render_thread;
105 event_t *wake_render_thread;
106 event_t *wake_main_thread;
107 event_t *not_full_event;
109 struct
110 {
111 uint32_t src_base;
112 uint32_t dest_base;
113 int clip_l, clip_r, clip_t, clip_b;
114 int dest_str, src_str;
115 uint32_t mono_pat_0;
116 uint32_t mono_pat_1;
117 uint32_t pat_bg_clr;
118 uint32_t pat_fg_clr;
119 uint32_t src_bg_clr;
120 uint32_t src_fg_clr;
121 uint32_t cmd_set;
122 int r_width, r_height;
123 int rsrc_x, rsrc_y;
124 int rdest_x, rdest_y;
126 int lxend0, lxend1;
127 int32_t ldx;
128 uint32_t lxstart, lystart;
129 int lycnt;
130 int line_dir;
132 int src_x, src_y;
133 int dest_x, dest_y;
134 int w, h;
135 uint8_t rop;
137 int data_left_count;
138 uint32_t data_left;
140 uint32_t pattern_8[8*8];
141 uint32_t pattern_16[8*8];
142 uint32_t pattern_32[8*8];
143 } s3d;
145 s3d_t s3d_tri;
147 s3d_t s3d_buffer[RB_SIZE];
148 int s3d_read_idx, s3d_write_idx;
149 int s3d_busy;
151 struct
152 {
153 uint32_t pri_ctrl;
154 uint32_t chroma_ctrl;
155 uint32_t sec_ctrl;
156 uint32_t chroma_upper_bound;
157 uint32_t sec_filter;
158 uint32_t blend_ctrl;
159 uint32_t pri_fb0, pri_fb1;
160 uint32_t pri_stride;
161 uint32_t buffer_ctrl;
162 uint32_t sec_fb0, sec_fb1;
163 uint32_t sec_stride;
164 uint32_t overlay_ctrl;
165 int32_t k1_vert_scale;
166 int32_t k2_vert_scale;
167 int32_t dda_vert_accumulator;
168 int32_t k1_horiz_scale;
169 int32_t k2_horiz_scale;
170 int32_t dda_horiz_accumulator;
171 uint32_t fifo_ctrl;
172 uint32_t pri_start;
173 uint32_t pri_size;
174 uint32_t sec_start;
175 uint32_t sec_size;
177 int sdif;
179 int pri_x, pri_y, pri_w, pri_h;
180 int sec_x, sec_y, sec_w, sec_h;
181 } streams;
182 } virge_t;
184 static void queue_triangle(virge_t *virge);
186 static void s3_virge_recalctimings(svga_t *svga);
187 static void s3_virge_updatemapping(virge_t *virge);
189 static void s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat);
191 static uint8_t s3_virge_mmio_read(uint32_t addr, void *p);
192 static uint16_t s3_virge_mmio_read_w(uint32_t addr, void *p);
193 static uint32_t s3_virge_mmio_read_l(uint32_t addr, void *p);
194 static void s3_virge_mmio_write(uint32_t addr, uint8_t val, void *p);
195 static void s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *p);
196 static void s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *p);
198 enum
199 {
200 CMD_SET_AE = 1,
201 CMD_SET_HC = (1 << 1),
203 CMD_SET_FORMAT_MASK = (7 << 2),
204 CMD_SET_FORMAT_8 = (0 << 2),
205 CMD_SET_FORMAT_16 = (1 << 2),
206 CMD_SET_FORMAT_24 = (2 << 2),
208 CMD_SET_MS = (1 << 6),
209 CMD_SET_IDS = (1 << 7),
210 CMD_SET_MP = (1 << 8),
211 CMD_SET_TP = (1 << 9),
213 CMD_SET_ITA_MASK = (3 << 10),
214 CMD_SET_ITA_BYTE = (0 << 10),
215 CMD_SET_ITA_WORD = (1 << 10),
216 CMD_SET_ITA_DWORD = (2 << 10),
218 CMD_SET_ZUP = (1 << 23),
220 CMD_SET_ZB_MODE = (3 << 24),
222 CMD_SET_XP = (1 << 25),
223 CMD_SET_YP = (1 << 26),
225 CMD_SET_COMMAND_MASK = (15 << 27)
226 };
228 #define CMD_SET_ABC_SRC (1 << 18)
229 #define CMD_SET_ABC_ENABLE (1 << 19)
230 #define CMD_SET_TWE (1 << 26)
232 enum
233 {
234 CMD_SET_COMMAND_BITBLT = (0 << 27),
235 CMD_SET_COMMAND_RECTFILL = (2 << 27),
236 CMD_SET_COMMAND_LINE = (3 << 27),
237 CMD_SET_COMMAND_NOP = (15 << 27)
238 };
240 static void s3_virge_out(uint16_t addr, uint8_t val, void *p)
241 {
242 virge_t *virge = (virge_t *)p;
243 svga_t *svga = &virge->svga;
244 uint8_t old;
246 if (((addr & 0xfff0) == 0x3d0 || (addr & 0xfff0) == 0x3b0) && !(svga->miscout & 1))
247 addr ^= 0x60;
249 // pclog("S3 out %04X %02X %04X:%08X %04X %04X %i\n", addr, val, CS, pc, ES, BX, ins);
251 switch (addr)
252 {
253 case 0x3c5:
254 if (svga->seqaddr >= 0x10)
255 {
256 svga->seqregs[svga->seqaddr & 0x1f]=val;
257 s3_virge_recalctimings(svga);
258 return;
259 }
260 if (svga->seqaddr == 4) /*Chain-4 - update banking*/
261 {
262 if (val & 8) svga->write_bank = svga->read_bank = virge->bank << 16;
263 else svga->write_bank = svga->read_bank = virge->bank << 14;
264 }
265 break;
267 //case 0x3C6: case 0x3C7: case 0x3C8: case 0x3C9:
268 // pclog("Write RAMDAC %04X %02X %04X:%04X\n", addr, val, CS, pc);
269 //sdac_ramdac_out(addr,val);
270 //return;
272 case 0x3d4:
273 svga->crtcreg = val;// & 0x7f;
274 return;
275 case 0x3d5:
276 //pclog("Write CRTC R%02X %02X %04x(%08x):%08x\n", svga->crtcreg, val, CS, cs, pc);
277 if (svga->crtcreg <= 7 && svga->crtc[0x11] & 0x80)
278 return;
279 if (svga->crtcreg >= 0x20 && svga->crtcreg != 0x38 && (svga->crtc[0x38] & 0xcc) != 0x48)
280 return;
281 if (svga->crtcreg >= 0x80)
282 return;
283 old = svga->crtc[svga->crtcreg];
284 svga->crtc[svga->crtcreg] = val;
285 switch (svga->crtcreg)
286 {
287 case 0x31:
288 virge->ma_ext = (virge->ma_ext & 0x1c) | ((val & 0x30) >> 4);
289 svga->vrammask = (val & 8) ? 0x3fffff : 0x3ffff;
290 break;
292 case 0x50:
293 switch (svga->crtc[0x50] & 0xc1)
294 {
295 case 0x00: virge->width = (svga->crtc[0x31] & 2) ? 2048 : 1024; break;
296 case 0x01: virge->width = 1152; break;
297 case 0x40: virge->width = 640; break;
298 case 0x80: virge->width = 800; break;
299 case 0x81: virge->width = 1600; break;
300 case 0xc0: virge->width = 1280; break;
301 }
302 virge->bpp = (svga->crtc[0x50] >> 4) & 3;
303 break;
304 case 0x69:
305 virge->ma_ext = val & 0x1f;
306 break;
308 case 0x35:
309 virge->bank = (virge->bank & 0x70) | (val & 0xf);
310 // pclog("CRTC write R35 %02X\n", val);
311 if (svga->chain4) svga->write_bank = svga->read_bank = virge->bank << 16;
312 else svga->write_bank = svga->read_bank = virge->bank << 14;
313 break;
314 case 0x51:
315 virge->bank = (virge->bank & 0x4f) | ((val & 0xc) << 2);
316 if (svga->chain4) svga->write_bank = svga->read_bank = virge->bank << 16;
317 else svga->write_bank = svga->read_bank = virge->bank << 14;
318 virge->ma_ext = (virge->ma_ext & ~0xc) | ((val & 3) << 2);
319 break;
320 case 0x6a:
321 virge->bank = val;
322 // pclog("CRTC write R6a %02X\n", val);
323 if (svga->chain4) svga->write_bank = svga->read_bank = virge->bank << 16;
324 else svga->write_bank = svga->read_bank = virge->bank << 14;
325 break;
327 case 0x3a:
328 if (val & 0x10) svga->gdcreg[5] |= 0x40; /*Horrible cheat*/
329 break;
331 case 0x45:
332 svga->hwcursor.ena = val & 1;
333 break;
334 case 0x46: case 0x47: case 0x48: case 0x49:
335 case 0x4c: case 0x4d: case 0x4e: case 0x4f:
336 svga->hwcursor.x = ((svga->crtc[0x46] << 8) | svga->crtc[0x47]) & 0x7ff;
337 svga->hwcursor.y = ((svga->crtc[0x48] << 8) | svga->crtc[0x49]) & 0x7ff;
338 svga->hwcursor.xoff = svga->crtc[0x4e] & 63;
339 svga->hwcursor.yoff = svga->crtc[0x4f] & 63;
340 svga->hwcursor.addr = ((((svga->crtc[0x4c] << 8) | svga->crtc[0x4d]) & 0xfff) * 1024) + (svga->hwcursor.yoff * 16);
341 break;
343 case 0x53:
344 case 0x58: case 0x59: case 0x5a:
345 s3_virge_updatemapping(virge);
346 break;
348 case 0x67:
349 switch (val >> 4)
350 {
351 case 3: svga->bpp = 15; break;
352 case 5: svga->bpp = 16; break;
353 case 7: svga->bpp = 24; break;
354 case 13: svga->bpp = 32; break;
355 default: svga->bpp = 8; break;
356 }
357 break;
358 //case 0x55: case 0x43:
359 // pclog("Write CRTC R%02X %02X\n", crtcreg, val);
360 }
361 if (old != val)
362 {
363 if (svga->crtcreg < 0xe || svga->crtcreg > 0x10)
364 {
365 svga->fullchange = changeframecount;
366 svga_recalctimings(svga);
367 }
368 }
369 break;
370 }
371 svga_out(addr, val, svga);
372 }
374 static uint8_t s3_virge_in(uint16_t addr, void *p)
375 {
376 virge_t *virge = (virge_t *)p;
377 svga_t *svga = &virge->svga;
378 uint8_t ret;
380 if (((addr & 0xfff0) == 0x3d0 || (addr & 0xfff0) == 0x3b0) && !(svga->miscout & 1))
381 addr ^= 0x60;
383 // if (addr != 0x3da) pclog("S3 in %04X %04X:%08X ", addr, CS, pc);
384 switch (addr)
385 {
386 //case 0x3C6: case 0x3C7: case 0x3C8: case 0x3C9:
387 // pclog("Read RAMDAC %04X %04X:%04X\n", addr, CS, pc);
388 //return sdac_ramdac_in(addr);
390 case 0x3c5:
391 if (svga->seqaddr >= 0x10)
392 ret = svga->seqregs[svga->seqaddr & 0x1f];
393 else
394 ret = svga_in(addr, svga);
395 break;
397 case 0x3D4:
398 ret = svga->crtcreg;
399 break;
400 case 0x3D5:
401 //pclog("Read CRTC R%02X %04X:%04X (%02x)\n", svga->crtcreg, CS, pc, svga->crtc[svga->crtcreg]);
402 switch (svga->crtcreg)
403 {
404 case 0x2d: ret = virge->virge_id_high; break; /*Extended chip ID*/
405 case 0x2e: ret = virge->virge_id_low; break; /*New chip ID*/
406 case 0x2f: ret = virge->virge_rev; break;
407 case 0x30: ret = virge->virge_id; break; /*Chip ID*/
408 case 0x31: ret = (svga->crtc[0x31] & 0xcf) | ((virge->ma_ext & 3) << 4); break;
409 case 0x35: ret = (svga->crtc[0x35] & 0xf0) | (virge->bank & 0xf); break;
410 case 0x36: ret = (svga->crtc[0x36] & 0xfc) | 2; break; /*PCI bus*/
411 case 0x51: ret = (svga->crtc[0x51] & 0xf0) | ((virge->bank >> 2) & 0xc) | ((virge->ma_ext >> 2) & 3); break;
412 case 0x69: ret = virge->ma_ext; break;
413 case 0x6a: ret = virge->bank; break;
414 default: ret = svga->crtc[svga->crtcreg]; break;
415 }
416 break;
418 default:
419 ret = svga_in(addr, svga);
420 break;
421 }
422 // if (addr != 0x3da) pclog("%02X\n", ret);
423 return ret;
424 }
426 static void s3_virge_recalctimings(svga_t *svga)
427 {
428 virge_t *virge = (virge_t *)svga->p;
430 if (svga->crtc[0x5d] & 0x01) svga->htotal += 0x100;
431 if (svga->crtc[0x5d] & 0x02) svga->hdisp += 0x100;
432 if (svga->crtc[0x5e] & 0x01) svga->vtotal += 0x400;
433 if (svga->crtc[0x5e] & 0x02) svga->dispend += 0x400;
434 if (svga->crtc[0x5e] & 0x04) svga->vblankstart += 0x400;
435 if (svga->crtc[0x5e] & 0x10) svga->vsyncstart += 0x400;
436 if (svga->crtc[0x5e] & 0x40) svga->split += 0x400;
437 svga->interlace = svga->crtc[0x42] & 0x20;
439 if ((svga->crtc[0x67] & 0xc) != 0xc) /*VGA mode*/
440 {
441 svga->ma_latch |= (virge->ma_ext << 16);
442 //pclog("VGA mode\n");
443 if (svga->crtc[0x51] & 0x30) svga->rowoffset += (svga->crtc[0x51] & 0x30) << 4;
444 else if (svga->crtc[0x43] & 0x04) svga->rowoffset += 0x100;
445 if (!svga->rowoffset) svga->rowoffset = 256;
447 if ((svga->gdcreg[5] & 0x40) && (svga->crtc[0x3a] & 0x10))
448 {
449 switch (svga->bpp)
450 {
451 case 8:
452 svga->render = svga_render_8bpp_highres;
453 break;
454 case 15:
455 svga->render = svga_render_15bpp_highres;
456 break;
457 case 16:
458 svga->render = svga_render_16bpp_highres;
459 break;
460 case 24:
461 svga->render = svga_render_24bpp_highres;
462 break;
463 case 32:
464 svga->render = svga_render_32bpp_highres;
465 break;
466 }
467 }
469 // pclog("svga->rowoffset = %i bpp=%i\n", svga->rowoffset, svga->bpp);
470 if (svga->bpp == 15 || svga->bpp == 16)
471 {
472 svga->htotal >>= 1;
473 svga->hdisp >>= 1;
474 }
475 if (svga->bpp == 24)
476 {
477 svga->rowoffset = (svga->rowoffset * 3) / 4; /*Hack*/
478 }
479 //pclog("VGA mode x_disp=%i dispend=%i vtotal=%i\n", svga->hdisp, svga->dispend, svga->vtotal);
480 }
481 else /*Streams mode*/
482 {
483 if (virge->streams.buffer_ctrl & 1)
484 svga->ma_latch = virge->streams.pri_fb1 >> 2;
485 else
486 svga->ma_latch = virge->streams.pri_fb0 >> 2;
488 svga->hdisp = virge->streams.pri_w + 1;
489 svga->dispend = virge->streams.pri_h;
491 svga->overlay.x = virge->streams.sec_x - virge->streams.pri_x;
492 svga->overlay.y = virge->streams.sec_y - virge->streams.pri_y;
493 svga->overlay.ysize = virge->streams.sec_h;
495 if (virge->streams.buffer_ctrl & 2)
496 svga->overlay.addr = virge->streams.sec_fb1;
497 else
498 svga->overlay.addr = virge->streams.sec_fb0;
500 svga->overlay.ena = (svga->overlay.x >= 0);
501 svga->overlay.v_acc = virge->streams.dda_vert_accumulator;
502 //pclog("Streams mode x_disp=%i dispend=%i vtotal=%i x=%i y=%i ysize=%i\n", svga->hdisp, svga->dispend, svga->vtotal, svga->overlay.x, svga->overlay.y, svga->overlay.ysize);
503 svga->rowoffset = virge->streams.pri_stride >> 3;
505 switch ((virge->streams.pri_ctrl >> 24) & 0x7)
506 {
507 case 0: /*RGB-8 (CLUT)*/
508 svga->render = svga_render_8bpp_highres;
509 break;
510 case 3: /*KRGB-16 (1.5.5.5)*/
511 svga->htotal >>= 1;
512 svga->render = svga_render_15bpp_highres;
513 break;
514 case 5: /*RGB-16 (5.6.5)*/
515 svga->htotal >>= 1;
516 svga->render = svga_render_16bpp_highres;
517 break;
518 case 6: /*RGB-24 (8.8.8)*/
519 svga->render = svga_render_24bpp_highres;
520 break;
521 case 7: /*XRGB-32 (X.8.8.8)*/
522 svga->render = svga_render_32bpp_highres;
523 break;
524 }
525 }
527 if (((svga->miscout >> 2) & 3) == 3)
528 {
529 int n = svga->seqregs[0x12] & 0x1f;
530 int r = (svga->seqregs[0x12] >> 5) & 3;
531 int m = svga->seqregs[0x13] & 0x7f;
532 double freq = (((double)m + 2) / (((double)n + 2) * (double)(1 << r))) * 14318184.0;
534 svga->clock = cpuclock / freq;
535 }
536 }
538 static void s3_virge_updatemapping(virge_t *virge)
539 {
540 svga_t *svga = &virge->svga;
542 if (!(virge->pci_regs[PCI_REG_COMMAND] & PCI_COMMAND_MEM))
543 {
544 // pclog("Update mapping - PCI disabled\n");
545 mem_mapping_disable(&svga->mapping);
546 mem_mapping_disable(&virge->linear_mapping);
547 mem_mapping_disable(&virge->mmio_mapping);
548 mem_mapping_disable(&virge->new_mmio_mapping);
549 return;
550 }
552 pclog("Update mapping - bank %02X ", svga->gdcreg[6] & 0xc);
553 switch (svga->gdcreg[6] & 0xc) /*Banked framebuffer*/
554 {
555 case 0x0: /*128k at A0000*/
556 mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x20000);
557 svga->banked_mask = 0xffff;
558 break;
559 case 0x4: /*64k at A0000*/
560 mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x10000);
561 svga->banked_mask = 0xffff;
562 break;
563 case 0x8: /*32k at B0000*/
564 mem_mapping_set_addr(&svga->mapping, 0xb0000, 0x08000);
565 svga->banked_mask = 0x7fff;
566 break;
567 case 0xC: /*32k at B8000*/
568 mem_mapping_set_addr(&svga->mapping, 0xb8000, 0x08000);
569 svga->banked_mask = 0x7fff;
570 break;
571 }
573 virge->linear_base = (svga->crtc[0x5a] << 16) | (svga->crtc[0x59] << 24);
575 pclog("Linear framebuffer %02X ", svga->crtc[0x58] & 0x10);
576 if (svga->crtc[0x58] & 0x10) /*Linear framebuffer*/
577 {
578 switch (svga->crtc[0x58] & 3)
579 {
580 case 0: /*64k*/
581 virge->linear_size = 0x10000;
582 break;
583 case 1: /*1mb*/
584 virge->linear_size = 0x100000;
585 break;
586 case 2: /*2mb*/
587 virge->linear_size = 0x200000;
588 break;
589 case 3: /*8mb*/
590 virge->linear_size = 0x400000;
591 break;
592 }
593 virge->linear_base &= ~(virge->linear_size - 1);
594 // pclog("%08X %08X %02X %02X %02X\n", linear_base, linear_size, crtc[0x58], crtc[0x59], crtc[0x5a]);
595 pclog("Linear framebuffer at %08X size %08X\n", virge->linear_base, virge->linear_size);
596 if (virge->linear_base == 0xa0000)
597 {
598 mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x10000);
599 mem_mapping_disable(&virge->linear_mapping);
600 }
601 else
602 mem_mapping_set_addr(&virge->linear_mapping, virge->linear_base, virge->linear_size);
603 }
604 else
605 mem_mapping_disable(&virge->linear_mapping);
607 pclog("Memory mapped IO %02X\n", svga->crtc[0x53] & 0x18);
608 if (svga->crtc[0x53] & 0x10) /*Old MMIO*/
609 {
610 if (svga->crtc[0x53] & 0x20)
611 mem_mapping_set_addr(&virge->mmio_mapping, 0xb8000, 0x8000);
612 else
613 mem_mapping_set_addr(&virge->mmio_mapping, 0xa0000, 0x10000);
614 }
615 else
616 mem_mapping_disable(&virge->mmio_mapping);
618 if (svga->crtc[0x53] & 0x08) /*New MMIO*/
619 mem_mapping_set_addr(&virge->new_mmio_mapping, virge->linear_base + 0x1000000, 0x10000);
620 else
621 mem_mapping_disable(&virge->new_mmio_mapping);
623 }
626 static uint8_t s3_virge_mmio_read(uint32_t addr, void *p)
627 {
628 reg_reads++;
629 // pclog("New MMIO readb %08X\n", addr);
630 switch (addr & 0xffff)
631 {
632 case 0x83b0: case 0x83b1: case 0x83b2: case 0x83b3:
633 case 0x83b4: case 0x83b5: case 0x83b6: case 0x83b7:
634 case 0x83b8: case 0x83b9: case 0x83ba: case 0x83bb:
635 case 0x83bc: case 0x83bd: case 0x83be: case 0x83bf:
636 case 0x83c0: case 0x83c1: case 0x83c2: case 0x83c3:
637 case 0x83c4: case 0x83c5: case 0x83c6: case 0x83c7:
638 case 0x83c8: case 0x83c9: case 0x83ca: case 0x83cb:
639 case 0x83cc: case 0x83cd: case 0x83ce: case 0x83cf:
640 case 0x83d0: case 0x83d1: case 0x83d2: case 0x83d3:
641 case 0x83d4: case 0x83d5: case 0x83d6: case 0x83d7:
642 case 0x83d8: case 0x83d9: case 0x83da: case 0x83db:
643 case 0x83dc: case 0x83dd: case 0x83de: case 0x83df:
644 return s3_virge_in(addr & 0x3ff, p);
645 }
646 return 0xff;
647 }
648 static uint16_t s3_virge_mmio_read_w(uint32_t addr, void *p)
649 {
650 reg_reads++;
651 // pclog("New MMIO readw %08X\n", addr);
652 switch (addr & 0xfffe)
653 {
654 default:
655 return s3_virge_mmio_read(addr, p) | (s3_virge_mmio_read(addr + 1, p) << 8);
656 }
657 return 0xffff;
658 }
659 static uint32_t s3_virge_mmio_read_l(uint32_t addr, void *p)
660 {
661 virge_t *virge = (virge_t *)p;
662 uint32_t ret = 0xffffffff;
663 reg_reads++;
664 // pclog("New MMIO readl %08X %04X(%08X):%08X ", addr, CS, cs, pc);
665 switch (addr & 0xfffc)
666 {
667 case 0x8180:
668 ret = virge->streams.pri_ctrl;
669 break;
670 case 0x8184:
671 ret = virge->streams.chroma_ctrl;
672 break;
673 case 0x8190:
674 ret = virge->streams.sec_ctrl;
675 break;
676 case 0x8194:
677 ret = virge->streams.chroma_upper_bound;
678 break;
679 case 0x8198:
680 ret = virge->streams.sec_filter;
681 break;
682 case 0x81a0:
683 ret = virge->streams.blend_ctrl;
684 break;
685 case 0x81c0:
686 ret = virge->streams.pri_fb0;
687 break;
688 case 0x81c4:
689 ret = virge->streams.pri_fb1;
690 break;
691 case 0x81c8:
692 ret = virge->streams.pri_stride;
693 break;
694 case 0x81cc:
695 ret = virge->streams.buffer_ctrl;
696 break;
697 case 0x81d0:
698 ret = virge->streams.sec_fb0;
699 break;
700 case 0x81d4:
701 ret = virge->streams.sec_fb1;
702 break;
703 case 0x81d8:
704 ret = virge->streams.sec_stride;
705 break;
706 case 0x81dc:
707 ret = virge->streams.overlay_ctrl;
708 break;
709 case 0x81e0:
710 ret = virge->streams.k1_vert_scale;
711 break;
712 case 0x81e4:
713 ret = virge->streams.k2_vert_scale;
714 break;
715 case 0x81e8:
716 ret = virge->streams.dda_vert_accumulator;
717 break;
718 case 0x81ec:
719 ret = virge->streams.fifo_ctrl;
720 break;
721 case 0x81f0:
722 ret = virge->streams.pri_start;
723 break;
724 case 0x81f4:
725 ret = virge->streams.pri_size;
726 break;
727 case 0x81f8:
728 ret = virge->streams.sec_start;
729 break;
730 case 0x81fc:
731 ret = virge->streams.sec_size;
732 break;
734 case 0x8504:
735 if (virge->s3d_busy)
736 ret = (0x10 << 8);
737 else
738 ret = (0x10 << 8) | (1 << 13);
739 // pclog("Read status %04x %i\n", ret, virge->s3d_busy);
740 break;
741 case 0xa4d4:
742 ret = virge->s3d.src_base;
743 break;
744 case 0xa4d8:
745 ret = virge->s3d.dest_base;
746 break;
747 case 0xa4dc:
748 ret = (virge->s3d.clip_l << 16) | virge->s3d.clip_r;
749 break;
750 case 0xa4e0:
751 ret = (virge->s3d.clip_t << 16) | virge->s3d.clip_b;
752 break;
753 case 0xa4e4:
754 ret = (virge->s3d.dest_str << 16) | virge->s3d.src_str;
755 break;
756 case 0xa4e8:
757 ret = virge->s3d.mono_pat_0;
758 break;
759 case 0xa4ec:
760 ret = virge->s3d.mono_pat_1;
761 break;
762 case 0xa4f0:
763 ret = virge->s3d.pat_bg_clr;
764 break;
765 case 0xa4f4:
766 ret = virge->s3d.pat_fg_clr;
767 break;
768 case 0xa4f8:
769 ret = virge->s3d.src_bg_clr;
770 break;
771 case 0xa4fc:
772 ret = virge->s3d.src_fg_clr;
773 break;
774 case 0xa500:
775 ret = virge->s3d.cmd_set;
776 break;
777 case 0xa504:
778 ret = (virge->s3d.r_width << 16) | virge->s3d.r_height;
779 break;
780 case 0xa508:
781 ret = (virge->s3d.rsrc_x << 16) | virge->s3d.rsrc_y;
782 break;
783 case 0xa50c:
784 ret = (virge->s3d.rdest_x << 16) | virge->s3d.rdest_y;
785 break;
787 default:
788 ret = s3_virge_mmio_read_w(addr, p) | (s3_virge_mmio_read_w(addr + 2, p) << 16);
789 }
790 // /*if ((addr & 0xfffc) != 0x8504) */pclog("%02x\n", ret);
791 return ret;
792 }
793 static void s3_virge_mmio_write(uint32_t addr, uint8_t val, void *p)
794 {
795 virge_t *virge = (virge_t *)p;
796 svga_t *svga = &virge->svga;
798 // pclog("New MMIO writeb %08X %02X %04x(%08x):%08x\n", addr, val, CS, cs, pc);
799 reg_writes++;
800 if ((addr & 0xfffc) < 0x8000)
801 s3_virge_bitblt(virge, 8, val);
802 else switch (addr & 0xffff)
803 {
804 case 0x83b0: case 0x83b1: case 0x83b2: case 0x83b3:
805 case 0x83b4: case 0x83b5: case 0x83b6: case 0x83b7:
806 case 0x83b8: case 0x83b9: case 0x83ba: case 0x83bb:
807 case 0x83bc: case 0x83bd: case 0x83be: case 0x83bf:
808 case 0x83c0: case 0x83c1: case 0x83c2: case 0x83c3:
809 case 0x83c4: case 0x83c5: case 0x83c6: case 0x83c7:
810 case 0x83c8: case 0x83c9: case 0x83ca: case 0x83cb:
811 case 0x83cc: case 0x83cd: case 0x83ce: case 0x83cf:
812 case 0x83d0: case 0x83d1: case 0x83d2: case 0x83d3:
813 case 0x83d4: case 0x83d5: case 0x83d6: case 0x83d7:
814 case 0x83d8: case 0x83d9: case 0x83da: case 0x83db:
815 case 0x83dc: case 0x83dd: case 0x83de: case 0x83df:
816 s3_virge_out(addr & 0x3ff, val, p);
817 break;
818 }
821 }
822 static void s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *p)
823 {
824 virge_t *virge = (virge_t *)p;
825 reg_writes++;
826 // pclog("New MMIO writew %08X %04X %04x(%08x):%08x\n", addr, val, CS, cs, pc);
827 if ((addr & 0xfffc) < 0x8000)
828 {
829 if (virge->s3d.cmd_set & CMD_SET_MS)
830 s3_virge_bitblt(virge, 16, ((val >> 8) | (val << 8)) << 16);
831 else
832 s3_virge_bitblt(virge, 16, val);
833 }
834 else switch (addr & 0xfffe)
835 {
836 case 0x83d4:
837 s3_virge_mmio_write(addr, val, p);
838 s3_virge_mmio_write(addr + 1, val >> 8, p);
839 break;
840 }
841 }
842 static void s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *p)
843 {
844 virge_t *virge = (virge_t *)p;
845 svga_t *svga = &virge->svga;
846 reg_writes++;
847 // if ((addr & 0xfffc) >= 0xb400 && (addr & 0xfffc) < 0xb800)
848 // pclog("New MMIO writel %08X %08X %04x(%08x):%08x\n", addr, val, CS, cs, pc);
850 if ((addr & 0xfffc) < 0x8000)
851 {
852 if (virge->s3d.cmd_set & CMD_SET_MS)
853 s3_virge_bitblt(virge, 32, ((val & 0xff000000) >> 24) | ((val & 0x00ff0000) >> 8) | ((val & 0x0000ff00) << 8) | ((val & 0x000000ff) << 24));
854 else
855 s3_virge_bitblt(virge, 32, val);
856 }
857 else switch (addr & 0xfffc)
858 {
859 case 0x8180:
860 virge->streams.pri_ctrl = val;
861 s3_virge_recalctimings(svga);
862 svga->fullchange = changeframecount;
863 break;
864 case 0x8184:
865 virge->streams.chroma_ctrl = val;
866 break;
867 case 0x8190:
868 virge->streams.sec_ctrl = val;
869 virge->streams.dda_horiz_accumulator = val & 0xfff;
870 if (val & (1 << 11))
871 virge->streams.dda_horiz_accumulator |= 0xfffff800;
872 virge->streams.sdif = (val >> 24) & 7;
873 break;
874 case 0x8194:
875 virge->streams.chroma_upper_bound = val;
876 break;
877 case 0x8198:
878 virge->streams.sec_filter = val;
879 virge->streams.k1_horiz_scale = val & 0x7ff;
880 if (val & (1 << 10))
881 virge->streams.k1_horiz_scale |= 0xfffff800;
882 virge->streams.k2_horiz_scale = (val >> 16) & 0x7ff;
883 if ((val >> 16) & (1 << 10))
884 virge->streams.k2_horiz_scale |= 0xfffff800;
885 break;
886 case 0x81a0:
887 virge->streams.blend_ctrl = val;
888 break;
889 case 0x81c0:
890 // pclog("Write pri_fb0 %08x\n", val);
891 virge->streams.pri_fb0 = val & 0x3fffff;
892 s3_virge_recalctimings(svga);
893 svga->fullchange = changeframecount;
894 break;
895 case 0x81c4:
896 // pclog("Write pri_fb1 %08x\n", val);
897 virge->streams.pri_fb1 = val & 0x3fffff;
898 s3_virge_recalctimings(svga);
899 svga->fullchange = changeframecount;
900 break;
901 case 0x81c8:
902 virge->streams.pri_stride = val & 0xfff;
903 s3_virge_recalctimings(svga);
904 svga->fullchange = changeframecount;
905 break;
906 case 0x81cc:
907 // pclog("Write buffer_ctrl %08x\n", val);
908 virge->streams.buffer_ctrl = val;
909 s3_virge_recalctimings(svga);
910 svga->fullchange = changeframecount;
911 break;
912 case 0x81d0:
913 virge->streams.sec_fb0 = val;
914 s3_virge_recalctimings(svga);
915 svga->fullchange = changeframecount;
916 break;
917 case 0x81d4:
918 virge->streams.sec_fb1 = val;
919 s3_virge_recalctimings(svga);
920 svga->fullchange = changeframecount;
921 break;
922 case 0x81d8:
923 virge->streams.sec_stride = val;
924 s3_virge_recalctimings(svga);
925 svga->fullchange = changeframecount;
926 break;
927 case 0x81dc:
928 virge->streams.overlay_ctrl = val;
929 break;
930 case 0x81e0:
931 virge->streams.k1_vert_scale = val & 0x7ff;
932 if (val & (1 << 10))
933 virge->streams.k1_vert_scale |= 0xfffff800;
934 break;
935 case 0x81e4:
936 virge->streams.k2_vert_scale = val & 0x7ff;
937 if (val & (1 << 10))
938 virge->streams.k2_vert_scale |= 0xfffff800;
939 break;
940 case 0x81e8:
941 virge->streams.dda_vert_accumulator = val & 0xfff;
942 if (val & (1 << 11))
943 virge->streams.dda_vert_accumulator |= 0xfffff800;
944 break;
945 case 0x81ec:
946 virge->streams.fifo_ctrl = val;
947 break;
948 case 0x81f0:
949 virge->streams.pri_start = val;
950 virge->streams.pri_x = (val >> 16) & 0x7ff;
951 virge->streams.pri_y = val & 0x7ff;
952 s3_virge_recalctimings(svga);
953 svga->fullchange = changeframecount;
954 break;
955 case 0x81f4:
956 virge->streams.pri_size = val;
957 virge->streams.pri_w = (val >> 16) & 0x7ff;
958 virge->streams.pri_h = val & 0x7ff;
959 s3_virge_recalctimings(svga);
960 svga->fullchange = changeframecount;
961 break;
962 case 0x81f8:
963 virge->streams.sec_start = val;
964 virge->streams.sec_x = (val >> 16) & 0x7ff;
965 virge->streams.sec_y = val & 0x7ff;
966 s3_virge_recalctimings(svga);
967 svga->fullchange = changeframecount;
968 break;
969 case 0x81fc:
970 virge->streams.sec_size = val;
971 virge->streams.sec_w = (val >> 16) & 0x7ff;
972 virge->streams.sec_h = val & 0x7ff;
973 s3_virge_recalctimings(svga);
974 svga->fullchange = changeframecount;
975 break;
977 case 0xa000: case 0xa004: case 0xa008: case 0xa00c:
978 case 0xa010: case 0xa014: case 0xa018: case 0xa01c:
979 case 0xa020: case 0xa024: case 0xa028: case 0xa02c:
980 case 0xa030: case 0xa034: case 0xa038: case 0xa03c:
981 case 0xa040: case 0xa044: case 0xa048: case 0xa04c:
982 case 0xa050: case 0xa054: case 0xa058: case 0xa05c:
983 case 0xa060: case 0xa064: case 0xa068: case 0xa06c:
984 case 0xa070: case 0xa074: case 0xa078: case 0xa07c:
985 case 0xa080: case 0xa084: case 0xa088: case 0xa08c:
986 case 0xa090: case 0xa094: case 0xa098: case 0xa09c:
987 case 0xa0a0: case 0xa0a4: case 0xa0a8: case 0xa0ac:
988 case 0xa0b0: case 0xa0b4: case 0xa0b8: case 0xa0bc:
989 case 0xa0c0: case 0xa0c4: case 0xa0c8: case 0xa0cc:
990 case 0xa0d0: case 0xa0d4: case 0xa0d8: case 0xa0dc:
991 case 0xa0e0: case 0xa0e4: case 0xa0e8: case 0xa0ec:
992 case 0xa0f0: case 0xa0f4: case 0xa0f8: case 0xa0fc:
993 case 0xa100: case 0xa104: case 0xa108: case 0xa10c:
994 case 0xa110: case 0xa114: case 0xa118: case 0xa11c:
995 case 0xa120: case 0xa124: case 0xa128: case 0xa12c:
996 case 0xa130: case 0xa134: case 0xa138: case 0xa13c:
997 case 0xa140: case 0xa144: case 0xa148: case 0xa14c:
998 case 0xa150: case 0xa154: case 0xa158: case 0xa15c:
999 case 0xa160: case 0xa164: case 0xa168: case 0xa16c:
1000 case 0xa170: case 0xa174: case 0xa178: case 0xa17c:
1001 case 0xa180: case 0xa184: case 0xa188: case 0xa18c:
1002 case 0xa190: case 0xa194: case 0xa198: case 0xa19c:
1003 case 0xa1a0: case 0xa1a4: case 0xa1a8: case 0xa1ac:
1004 case 0xa1b0: case 0xa1b4: case 0xa1b8: case 0xa1bc:
1005 case 0xa1c0: case 0xa1c4: case 0xa1c8: case 0xa1cc:
1006 case 0xa1d0: case 0xa1d4: case 0xa1d8: case 0xa1dc:
1007 case 0xa1e0: case 0xa1e4: case 0xa1e8: case 0xa1ec:
1008 case 0xa1f0: case 0xa1f4: case 0xa1f8: case 0xa1fc:
1009 {
1010 int x = addr & 4;
1011 int y = (addr >> 3) & 7;
1012 virge->s3d.pattern_8[y*8 + x] = val & 0xff;
1013 virge->s3d.pattern_8[y*8 + x + 1] = val >> 8;
1014 virge->s3d.pattern_8[y*8 + x + 2] = val >> 16;
1015 virge->s3d.pattern_8[y*8 + x + 3] = val >> 24;
1017 x = (addr >> 1) & 6;
1018 y = (addr >> 4) & 7;
1019 virge->s3d.pattern_16[y*8 + x] = val & 0xffff;
1020 virge->s3d.pattern_16[y*8 + x + 1] = val >> 16;
1022 x = (addr >> 2) & 7;
1023 y = (addr >> 5) & 7;
1024 virge->s3d.pattern_32[y*8 + x] = val & 0xffffff;
1025 }
1026 break;
1028 case 0xa4d4: case 0xa8d4:
1029 virge->s3d.src_base = val & 0x3ffff8;
1030 break;
1031 case 0xa4d8: case 0xa8d8:
1032 virge->s3d.dest_base = val & 0x3ffff8;
1033 break;
1034 case 0xa4dc: case 0xa8dc:
1035 virge->s3d.clip_l = (val >> 16) & 0x7ff;
1036 virge->s3d.clip_r = val & 0x7ff;
1037 break;
1038 case 0xa4e0: case 0xa8e0:
1039 virge->s3d.clip_t = (val >> 16) & 0x7ff;
1040 virge->s3d.clip_b = val & 0x7ff;
1041 break;
1042 case 0xa4e4: case 0xa8e4:
1043 virge->s3d.dest_str = (val >> 16) & 0xff8;
1044 virge->s3d.src_str = val & 0xff8;
1045 break;
1046 case 0xa4e8:
1047 virge->s3d.mono_pat_0 = val;
1048 break;
1049 case 0xa4ec:
1050 virge->s3d.mono_pat_1 = val;
1051 break;
1052 case 0xa4f0:
1053 virge->s3d.pat_bg_clr = val;
1054 break;
1055 case 0xa4f4: case 0xa8f4:
1056 virge->s3d.pat_fg_clr = val;
1057 break;
1058 case 0xa4f8:
1059 virge->s3d.src_bg_clr = val;
1060 break;
1061 case 0xa4fc:
1062 virge->s3d.src_fg_clr = val;
1063 break;
1064 case 0xa500: case 0xa900:
1065 virge->s3d.cmd_set = val;
1066 if (!(val & CMD_SET_AE))
1067 s3_virge_bitblt(virge, -1, 0);
1068 break;
1069 case 0xa504:
1070 virge->s3d.r_width = (val >> 16) & 0x7ff;
1071 virge->s3d.r_height = val & 0x7ff;
1072 break;
1073 case 0xa508:
1074 virge->s3d.rsrc_x = (val >> 16) & 0x7ff;
1075 virge->s3d.rsrc_y = val & 0x7ff;
1076 break;
1077 case 0xa50c:
1078 virge->s3d.rdest_x = (val >> 16) & 0x7ff;
1079 virge->s3d.rdest_y = val & 0x7ff;
1080 if (virge->s3d.cmd_set & CMD_SET_AE)
1081 s3_virge_bitblt(virge, -1, 0);
1082 break;
1083 case 0xa96c:
1084 virge->s3d.lxend0 = (val >> 16) & 0x7ff;
1085 virge->s3d.lxend1 = val & 0x7ff;
1086 break;
1087 case 0xa970:
1088 virge->s3d.ldx = (int32_t)val;
1089 break;
1090 case 0xa974:
1091 virge->s3d.lxstart = val;
1092 break;
1093 case 0xa978:
1094 virge->s3d.lystart = val & 0x7ff;
1095 break;
1096 case 0xa97c:
1097 virge->s3d.lycnt = val & 0x7ff;
1098 virge->s3d.line_dir = val >> 31;
1099 if (virge->s3d.cmd_set & CMD_SET_AE)
1100 s3_virge_bitblt(virge, -1, 0);
1101 break;
1103 case 0xb4d4:
1104 virge->s3d_tri.z_base = val & 0x3ffff8;
1105 break;
1106 case 0xb4d8:
1107 virge->s3d_tri.dest_base = val & 0x3ffff8;
1108 break;
1109 case 0xb4dc:
1110 virge->s3d_tri.clip_l = (val >> 16) & 0x7ff;
1111 virge->s3d_tri.clip_r = val & 0x7ff;
1112 break;
1113 case 0xb4e0:
1114 virge->s3d_tri.clip_t = (val >> 16) & 0x7ff;
1115 virge->s3d_tri.clip_b = val & 0x7ff;
1116 break;
1117 case 0xb4e4:
1118 virge->s3d_tri.dest_str = (val >> 16) & 0xff8;
1119 virge->s3d.src_str = val & 0xff8;
1120 break;
1121 case 0xb4e8:
1122 virge->s3d_tri.z_str = val & 0xff8;
1123 break;
1124 case 0xb4ec:
1125 virge->s3d_tri.tex_base = val & 0x3ffff8;
1126 break;
1127 case 0xb4f0:
1128 virge->s3d_tri.tex_bdr_clr = val & 0xffffff;
1129 break;
1130 case 0xb500:
1131 virge->s3d_tri.cmd_set = val;
1132 if (!(val & CMD_SET_AE))
1133 queue_triangle(virge);
1134 /* {
1135 thread_set_event(virge->wake_render_thread);
1136 thread_wait_event(virge->wake_main_thread, -1);
1137 } */
1138 // s3_virge_triangle(virge);
1139 break;
1140 case 0xb504:
1141 virge->s3d_tri.tbv = val & 0xfffff;
1142 break;
1143 case 0xb508:
1144 virge->s3d_tri.tbu = val & 0xfffff;
1145 break;
1146 case 0xb50c:
1147 virge->s3d_tri.TdWdX = val;
1148 break;
1149 case 0xb510:
1150 virge->s3d_tri.TdWdY = val;
1151 break;
1152 case 0xb514:
1153 virge->s3d_tri.tws = val;
1154 break;
1155 case 0xb518:
1156 virge->s3d_tri.TdDdX = val;
1157 break;
1158 case 0xb51c:
1159 virge->s3d_tri.TdVdX = val;
1160 break;
1161 case 0xb520:
1162 virge->s3d_tri.TdUdX = val;
1163 break;
1164 case 0xb524:
1165 virge->s3d_tri.TdDdY = val;
1166 break;
1167 case 0xb528:
1168 virge->s3d_tri.TdVdY = val;
1169 break;
1170 case 0xb52c:
1171 virge->s3d_tri.TdUdY = val;
1172 break;
1173 case 0xb530:
1174 virge->s3d_tri.tds = val;
1175 break;
1176 case 0xb534:
1177 virge->s3d_tri.tvs = val;
1178 break;
1179 case 0xb538:
1180 virge->s3d_tri.tus = val;
1181 break;
1182 case 0xb53c:
1183 virge->s3d_tri.TdGdX = val >> 16;
1184 virge->s3d_tri.TdBdX = val & 0xffff;
1185 break;
1186 case 0xb540:
1187 virge->s3d_tri.TdAdX = val >> 16;
1188 virge->s3d_tri.TdRdX = val & 0xffff;
1189 break;
1190 case 0xb544:
1191 virge->s3d_tri.TdGdY = val >> 16;
1192 virge->s3d_tri.TdBdY = val & 0xffff;
1193 break;
1194 case 0xb548:
1195 virge->s3d_tri.TdAdY = val >> 16;
1196 virge->s3d_tri.TdRdY = val & 0xffff;
1197 break;
1198 case 0xb54c:
1199 virge->s3d_tri.tgs = (val >> 16) & 0xffff;
1200 virge->s3d_tri.tbs = val & 0xffff;
1201 break;
1202 case 0xb550:
1203 virge->s3d_tri.tas = (val >> 16) & 0xffff;
1204 virge->s3d_tri.trs = val & 0xffff;
1205 break;
1207 case 0xb554:
1208 virge->s3d_tri.TdZdX = val;
1209 break;
1210 case 0xb558:
1211 virge->s3d_tri.TdZdY = val;
1212 break;
1213 case 0xb55c:
1214 virge->s3d_tri.tzs = val;
1215 break;
1216 case 0xb560:
1217 virge->s3d_tri.TdXdY12 = val;
1218 break;
1219 case 0xb564:
1220 virge->s3d_tri.txend12 = val;
1221 break;
1222 case 0xb568:
1223 virge->s3d_tri.TdXdY01 = val;
1224 break;
1225 case 0xb56c:
1226 virge->s3d_tri.txend01 = val;
1227 break;
1228 case 0xb570:
1229 virge->s3d_tri.TdXdY02 = val;
1230 break;
1231 case 0xb574:
1232 virge->s3d_tri.txs = val;
1233 break;
1234 case 0xb578:
1235 virge->s3d_tri.tys = val;
1236 break;
1237 case 0xb57c:
1238 virge->s3d_tri.ty01 = (val >> 16) & 0x7ff;
1239 virge->s3d_tri.ty12 = val & 0x7ff;
1240 virge->s3d_tri.tlr = val >> 31;
1241 if (virge->s3d_tri.cmd_set & CMD_SET_AE)
1242 queue_triangle(virge);
1243 /* {
1244 thread_set_event(virge->wake_render_thread);
1245 thread_wait_event(virge->wake_main_thread, -1);
1246 }*/
1248 // s3_virge_triangle(virge);
1249 break;
1250 }
1251 }
1253 #define READ(addr, val) \
1254 do \
1255 { \
1256 switch (bpp) \
1257 { \
1258 case 0: /*8 bpp*/ \
1259 val = vram[addr & 0x3fffff]; \
1260 break; \
1261 case 1: /*16 bpp*/ \
1262 val = *(uint16_t *)&vram[addr & 0x3fffff]; \
1263 break; \
1264 case 2: /*24 bpp*/ \
1265 val = (*(uint32_t *)&vram[addr & 0x3fffff]) & 0xffffff; \
1266 break; \
1267 } \
1268 } while (0)
1270 #define Z_READ(addr) *(uint16_t *)&vram[addr & 0x3fffff]
1272 #define Z_WRITE(addr, val) if (!(s3d_tri->cmd_set & CMD_SET_ZB_MODE)) *(uint16_t *)&vram[addr & 0x3fffff] = val
1274 #define CLIP(x, y) \
1275 do \
1276 { \
1277 if ((virge->s3d.cmd_set & CMD_SET_HC) && \
1278 (x < virge->s3d.clip_l || \
1279 x > virge->s3d.clip_r || \
1280 y < virge->s3d.clip_t || \
1281 y > virge->s3d.clip_b)) \
1282 update = 0; \
1283 } while (0)
1285 #define CLIP_3D(x, y) \
1286 do \
1287 { \
1288 if ((s3d_tri->cmd_set & CMD_SET_HC) && \
1289 (x < s3d_tri->clip_l || \
1290 x > s3d_tri->clip_r || \
1291 y < s3d_tri->clip_t || \
1292 y > s3d_tri->clip_b)) \
1293 update = 0; \
1294 } while (0)
1296 #define Z_CLIP(Zzb, Zs) \
1297 do \
1298 { \
1299 if (!(s3d_tri->cmd_set & CMD_SET_ZB_MODE)) \
1300 switch ((s3d_tri->cmd_set >> 20) & 7) \
1301 { \
1302 case 0: update = 0; break; \
1303 case 1: if (Zs <= Zzb) update = 0; else Zzb = Zs; break; \
1304 case 2: if (Zs != Zzb) update = 0; else Zzb = Zs; break; \
1305 case 3: if (Zs < Zzb) update = 0; else Zzb = Zs; break; \
1306 case 4: if (Zs >= Zzb) update = 0; else Zzb = Zs; break; \
1307 case 5: if (Zs == Zzb) update = 0; else Zzb = Zs; break; \
1308 case 6: if (Zs > Zzb) update = 0; else Zzb = Zs; break; \
1309 case 7: update = 1; Zzb = Zs; break; \
1310 } \
1311 } while (0)
1313 #define MIX() \
1314 do \
1315 { \
1316 int c; \
1317 for (c = 0; c < 24; c++) \
1318 { \
1319 int d = (dest & (1 << c)) ? 1 : 0; \
1320 if (source & (1 << c)) d |= 2; \
1321 if (pattern & (1 << c)) d |= 4; \
1322 if (virge->s3d.rop & (1 << d)) out |= (1 << c); \
1323 } \
1324 } while (0)
1326 #define WRITE(addr, val) \
1327 do \
1328 { \
1329 switch (bpp) \
1330 { \
1331 case 0: /*8 bpp*/ \
1332 vram[addr & 0x3fffff] = val; \
1333 virge->svga.changedvram[(addr & 0x3fffff) >> 12] = changeframecount; \
1334 break; \
1335 case 1: /*16 bpp*/ \
1336 *(uint16_t *)&vram[addr & 0x3fffff] = val; \
1337 virge->svga.changedvram[(addr & 0x3fffff) >> 12] = changeframecount; \
1338 break; \
1339 case 2: /*24 bpp*/ \
1340 *(uint32_t *)&vram[addr & 0x3fffff] = (val & 0xffffff) | \
1341 (vram[(addr + 3) & 0x3fffff] << 24); \
1342 virge->svga.changedvram[(addr & 0x3fffff) >> 12] = changeframecount; \
1343 break; \
1344 } \
1345 } while (0)
1347 static void s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat)
1348 {
1349 int cpu_input = (count != -1);
1350 uint8_t *vram = virge->svga.vram;
1351 uint32_t mono_pattern[64];
1352 int count_mask;
1353 int x_inc = (virge->s3d.cmd_set & CMD_SET_XP) ? 1 : -1;
1354 int y_inc = (virge->s3d.cmd_set & CMD_SET_YP) ? 1 : -1;
1355 int bpp;
1356 int x_mul;
1357 int cpu_dat_shift;
1358 uint32_t *pattern_data;
1360 switch (virge->s3d.cmd_set & CMD_SET_FORMAT_MASK)
1361 {
1362 case CMD_SET_FORMAT_8:
1363 bpp = 0;
1364 x_mul = 1;
1365 cpu_dat_shift = 8;
1366 pattern_data = virge->s3d.pattern_8;
1367 break;
1368 case CMD_SET_FORMAT_16:
1369 bpp = 1;
1370 x_mul = 2;
1371 cpu_dat_shift = 16;
1372 pattern_data = virge->s3d.pattern_16;
1373 break;
1374 case CMD_SET_FORMAT_24:
1375 default:
1376 bpp = 2;
1377 x_mul = 3;
1378 cpu_dat_shift = 24;
1379 pattern_data = virge->s3d.pattern_32;
1380 break;
1381 }
1382 if (virge->s3d.cmd_set & CMD_SET_MP)
1383 pattern_data = mono_pattern;
1385 switch (virge->s3d.cmd_set & CMD_SET_ITA_MASK)
1386 {
1387 case CMD_SET_ITA_BYTE:
1388 count_mask = ~0x7;
1389 break;
1390 case CMD_SET_ITA_WORD:
1391 count_mask = ~0xf;
1392 break;
1393 case CMD_SET_ITA_DWORD:
1394 default:
1395 count_mask = ~0x1f;
1396 break;
1397 }
1398 if (virge->s3d.cmd_set & CMD_SET_MP)
1399 {
1400 int x, y;
1401 for (y = 0; y < 4; y++)
1402 {
1403 for (x = 0; x < 8; x++)
1404 {
1405 if (virge->s3d.mono_pat_0 & (1 << (x + y*8)))
1406 mono_pattern[y*8 + x] = virge->s3d.pat_fg_clr;
1407 else
1408 mono_pattern[y*8 + x] = virge->s3d.pat_bg_clr;
1409 if (virge->s3d.mono_pat_1 & (1 << (x + y*8)))
1410 mono_pattern[(y+4)*8 + x] = virge->s3d.pat_fg_clr;
1411 else
1412 mono_pattern[(y+4)*8 + x] = virge->s3d.pat_bg_clr;
1413 }
1414 }
1415 }
1416 switch (virge->s3d.cmd_set & CMD_SET_COMMAND_MASK)
1417 {
1418 case CMD_SET_COMMAND_NOP:
1419 break;
1421 case CMD_SET_COMMAND_BITBLT:
1422 if (count == -1)
1423 {
1424 virge->s3d.src_x = virge->s3d.rsrc_x;
1425 virge->s3d.src_y = virge->s3d.rsrc_y;
1426 virge->s3d.dest_x = virge->s3d.rdest_x;
1427 virge->s3d.dest_y = virge->s3d.rdest_y;
1428 virge->s3d.w = virge->s3d.r_width;
1429 virge->s3d.h = virge->s3d.r_height;
1430 virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff;
1431 virge->s3d.data_left_count = 0;
1433 /* pclog("BitBlt start %i,%i %i,%i %i,%i %02X %x %x\n",
1434 virge->s3d.src_x,
1435 virge->s3d.src_y,
1436 virge->s3d.dest_x,
1437 virge->s3d.dest_y,
1438 virge->s3d.w,
1439 virge->s3d.h,
1440 virge->s3d.rop,
1441 virge->s3d.src_base,
1442 virge->s3d.dest_base);*/
1444 if (virge->s3d.cmd_set & CMD_SET_IDS)
1445 return;
1446 }
1447 if (!virge->s3d.h)
1448 return;
1449 while (count)
1450 {
1451 uint32_t src_addr = virge->s3d.src_base + (virge->s3d.src_x * x_mul) + (virge->s3d.src_y * virge->s3d.src_str);
1452 uint32_t dest_addr = virge->s3d.dest_base + (virge->s3d.dest_x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str);
1453 uint32_t source, dest, pattern;
1454 uint32_t out = 0;
1455 int update = 1;
1457 switch (virge->s3d.cmd_set & (CMD_SET_MS | CMD_SET_IDS))
1458 {
1459 case 0:
1460 case CMD_SET_MS:
1461 READ(src_addr, source);
1462 if ((virge->s3d.cmd_set & CMD_SET_TP) && source == virge->s3d.src_fg_clr)
1463 update = 0;
1464 break;
1465 case CMD_SET_IDS:
1466 if (virge->s3d.data_left_count)
1467 {
1468 /*Handle shifting for 24-bit data*/
1469 source = virge->s3d.data_left;
1470 source |= ((cpu_dat << virge->s3d.data_left_count) & ~0xff000000);
1471 cpu_dat >>= (cpu_dat_shift - virge->s3d.data_left_count);
1472 count -= (cpu_dat_shift - virge->s3d.data_left_count);
1473 virge->s3d.data_left_count = 0;
1474 if (count < cpu_dat_shift)
1475 {
1476 virge->s3d.data_left = cpu_dat;
1477 virge->s3d.data_left_count = count;
1478 count = 0;
1479 }
1480 }
1481 else
1482 {
1483 source = cpu_dat;
1484 cpu_dat >>= cpu_dat_shift;
1485 count -= cpu_dat_shift;
1486 if (count < cpu_dat_shift)
1487 {
1488 virge->s3d.data_left = cpu_dat;
1489 virge->s3d.data_left_count = count;
1490 count = 0;
1491 }
1492 }
1493 if ((virge->s3d.cmd_set & CMD_SET_TP) && source == virge->s3d.src_fg_clr)
1494 update = 0;
1495 break;
1496 case CMD_SET_IDS | CMD_SET_MS:
1497 source = (cpu_dat & (1 << 31)) ? virge->s3d.src_fg_clr : virge->s3d.src_bg_clr;
1498 if ((virge->s3d.cmd_set & CMD_SET_TP) && !(cpu_dat & (1 << 31)))
1499 update = 0;
1500 cpu_dat <<= 1;
1501 count--;
1502 break;
1503 }
1505 CLIP(virge->s3d.dest_x, virge->s3d.dest_y);
1507 if (update)
1508 {
1509 READ(dest_addr, dest);
1510 pattern = pattern_data[(virge->s3d.dest_y & 7)*8 + (virge->s3d.dest_x & 7)];
1511 MIX();
1513 WRITE(dest_addr, out);
1514 }
1516 virge->s3d.src_x += x_inc;
1517 virge->s3d.dest_x += x_inc;
1518 if (!virge->s3d.w)
1519 {
1520 virge->s3d.src_x = virge->s3d.rsrc_x;
1521 virge->s3d.dest_x = virge->s3d.rdest_x;
1522 virge->s3d.w = virge->s3d.r_width;
1524 virge->s3d.src_y += y_inc;
1525 virge->s3d.dest_y += y_inc;
1526 virge->s3d.h--;
1528 switch (virge->s3d.cmd_set & (CMD_SET_MS | CMD_SET_IDS))
1529 {
1530 case CMD_SET_IDS:
1531 cpu_dat >>= (count - (count & count_mask));
1532 count &= count_mask;
1533 virge->s3d.data_left_count = 0;
1534 break;
1536 case CMD_SET_IDS | CMD_SET_MS:
1537 cpu_dat <<= (count - (count & count_mask));
1538 count &= count_mask;
1539 break;
1540 }
1541 if (!virge->s3d.h)
1542 {
1543 return;
1544 }
1545 }
1546 else
1547 virge->s3d.w--;
1548 }
1549 break;
1551 case CMD_SET_COMMAND_RECTFILL:
1552 /*No source, pattern = pat_fg_clr*/
1553 if (count == -1)
1554 {
1555 virge->s3d.src_x = virge->s3d.rsrc_x;
1556 virge->s3d.src_y = virge->s3d.rsrc_y;
1557 virge->s3d.dest_x = virge->s3d.rdest_x;
1558 virge->s3d.dest_y = virge->s3d.rdest_y;
1559 virge->s3d.w = virge->s3d.r_width;
1560 virge->s3d.h = virge->s3d.r_height;
1561 virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff;
1563 /* pclog("RctFll start %i,%i %i,%i %02X %08x\n", virge->s3d.dest_x,
1564 virge->s3d.dest_y,
1565 virge->s3d.w,
1566 virge->s3d.h,
1567 virge->s3d.rop, virge->s3d.dest_base);*/
1568 }
1570 while (count)
1571 {
1572 uint32_t dest_addr = virge->s3d.dest_base + (virge->s3d.dest_x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str);
1573 uint32_t source = 0, dest, pattern = virge->s3d.pat_fg_clr;
1574 uint32_t out = 0;
1575 int update = 1;
1577 CLIP(virge->s3d.dest_x, virge->s3d.dest_y);
1579 if (update)
1580 {
1581 READ(dest_addr, dest);
1583 MIX();
1585 WRITE(dest_addr, out);
1586 }
1588 virge->s3d.src_x += x_inc;
1589 virge->s3d.dest_x += x_inc;
1590 if (!virge->s3d.w)
1591 {
1592 virge->s3d.src_x = virge->s3d.rsrc_x;
1593 virge->s3d.dest_x = virge->s3d.rdest_x;
1594 virge->s3d.w = virge->s3d.r_width;
1596 virge->s3d.src_y += y_inc;
1597 virge->s3d.dest_y += y_inc;
1598 virge->s3d.h--;
1599 if (!virge->s3d.h)
1600 {
1601 return;
1602 }
1603 }
1604 else
1605 virge->s3d.w--;
1606 count--;
1607 }
1608 break;
1610 case CMD_SET_COMMAND_LINE:
1611 if (count == -1)
1612 {
1613 virge->s3d.dest_x = virge->s3d.lxstart;
1614 virge->s3d.dest_y = virge->s3d.lystart;
1615 virge->s3d.h = virge->s3d.lycnt;
1616 virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff;
1617 if (virge->s3d.ldx >= 0)
1618 virge->s3d.dest_x -= virge->s3d.ldx / 2;
1619 else
1620 virge->s3d.dest_x += virge->s3d.ldx / 2;
1621 //virge->s3d.dest_dest_x = virge->s3d.dest_x + virge->s3d.ldx;
1622 }
1623 while (virge->s3d.h)
1624 {
1625 int x = virge->s3d.dest_x >> 20;
1626 int new_x = (virge->s3d.dest_x + virge->s3d.ldx) >> 20;
1628 do
1629 {
1630 uint32_t dest_addr = virge->s3d.dest_base + (x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str);
1631 uint32_t source = 0, dest, pattern;
1632 uint32_t out = 0;
1633 int update = 1;
1635 CLIP(x, virge->s3d.dest_y);
1637 if (update)
1638 {
1639 READ(dest_addr, dest);
1640 pattern = virge->s3d.pat_fg_clr;
1642 MIX();
1644 WRITE(dest_addr, out);
1645 }
1647 if (x < new_x)
1648 x++;
1649 else if (x > new_x)
1650 x--;
1651 } while (x != new_x);
1653 virge->s3d.dest_x += virge->s3d.ldx;
1654 virge->s3d.dest_y--;
1655 virge->s3d.h--;
1656 }
1657 break;
1659 default:
1660 fatal("s3_virge_bitblt : blit command %i %08x\n", (virge->s3d.cmd_set >> 27) & 0xf, virge->s3d.cmd_set);
1661 }
1662 }
1664 #define RGB15_TO_24(val, r, g, b) b = ((val & 0x001f) << 3) | ((val & 0x001f) >> 2); \
1665 g = ((val & 0x03e0) >> 2) | ((val & 0x03e0) >> 7); \
1666 r = ((val & 0x7c00) >> 7) | ((val & 0x7c00) >> 12);
1668 #define RGB24_TO_24(val, r, g, b) b = val & 0xff; \
1669 g = (val & 0xff00) >> 8; \
1670 r = (val & 0xff0000) >> 16
1672 #define RGB15(r, g, b, dest) \
1673 if (virge->dithering_enabled) \
1674 { \
1675 int add = dither[_y & 3][_x & 3]; \
1676 int _r = (r > 248) ? 248 : r+add; \
1677 int _g = (g > 248) ? 248 : g+add; \
1678 int _b = (b > 248) ? 248 : b+add; \
1679 dest = ((_b >> 3) & 0x1f) | (((_g >> 3) & 0x1f) << 5) | (((_r >> 3) & 0x1f) << 10); \
1680 } \
1681 else \
1682 dest = ((b >> 3) & 0x1f) | (((g >> 3) & 0x1f) << 5) | (((r >> 3) & 0x1f) << 10)
1684 #define RGB24(r, g, b) ((b) | ((g) << 8) | ((r) << 16))
1686 typedef struct rgba_t
1687 {
1688 int r, g, b, a;
1689 } rgba_t;
1691 typedef struct s3d_state_t
1692 {
1693 int32_t r, g, b, a, u, v, d, w;
1695 int32_t base_r, base_g, base_b, base_a, base_u, base_v, base_d, base_w;
1697 uint32_t base_z;
1699 uint32_t tbu, tbv;
1701 uint32_t cmd_set;
1702 int max_d;
1704 uint16_t *texture[10];
1706 uint32_t tex_bdr_clr;
1708 int32_t x1, x2;
1709 int y;
1711 rgba_t dest_rgba;
1712 } s3d_state_t;
1714 typedef struct s3d_texture_state_t
1715 {
1716 int level;
1717 int texture_shift;
1719 int32_t u, v;
1720 } s3d_texture_state_t;
1722 static void (*tex_read)(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out);
1723 static void (*tex_sample)(s3d_state_t *state);
1724 static void (*dest_pixel)(s3d_state_t *state);
1726 #define MAX(a, b) ((a) > (b) ? (a) : (b))
1727 #define MIN(a, b) ((a) < (b) ? (a) : (b))
1729 static int _x, _y;
1731 static void tex_ARGB1555(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1732 {
1733 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1734 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1735 uint16_t val = state->texture[texture_state->level][offset];
1737 out->r = ((val & 0x7c00) >> 7) | ((val & 0x7000) >> 12);
1738 out->g = ((val & 0x03e0) >> 2) | ((val & 0x0380) >> 7);
1739 out->b = ((val & 0x001f) << 3) | ((val & 0x001c) >> 2);
1740 out->a = (val & 0x8000) ? 0xff : 0;
1741 }
1743 static void tex_ARGB1555_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1744 {
1745 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1746 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1747 uint16_t val = state->texture[texture_state->level][offset];
1749 if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000)
1750 val = state->tex_bdr_clr;
1752 out->r = ((val & 0x7c00) >> 7) | ((val & 0x7000) >> 12);
1753 out->g = ((val & 0x03e0) >> 2) | ((val & 0x0380) >> 7);
1754 out->b = ((val & 0x001f) << 3) | ((val & 0x001c) >> 2);
1755 out->a = (val & 0x8000) ? 0xff : 0;
1756 }
1758 static void tex_ARGB4444(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1759 {
1760 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1761 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1762 uint16_t val = state->texture[texture_state->level][offset];
1764 out->r = ((val & 0x0f00) >> 4) | ((val & 0x0f00) >> 8);
1765 out->g = (val & 0x00f0) | ((val & 0x00f0) >> 4);
1766 out->b = ((val & 0x000f) << 4) | (val & 0x000f);
1767 out->a = ((val & 0xf000) >> 8) | ((val & 0xf000) >> 12);
1768 }
1770 static void tex_ARGB4444_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1771 {
1772 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1773 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1774 uint16_t val = state->texture[texture_state->level][offset];
1776 if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000)
1777 val = state->tex_bdr_clr;
1779 out->r = ((val & 0x0f00) >> 4) | ((val & 0x0f00) >> 8);
1780 out->g = (val & 0x00f0) | ((val & 0x00f0) >> 4);
1781 out->b = ((val & 0x000f) << 4) | (val & 0x000f);
1782 out->a = ((val & 0xf000) >> 8) | ((val & 0xf000) >> 12);
1783 }
1785 static void tex_ARGB8888(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1786 {
1787 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1788 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1789 uint32_t val = ((uint32_t *)state->texture[texture_state->level])[offset];
1791 out->r = (val >> 16) & 0xff;
1792 out->g = (val >> 8) & 0xff;
1793 out->b = val & 0xff;
1794 out->a = (val >> 24) & 0xff;
1795 }
1796 static void tex_ARGB8888_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1797 {
1798 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1799 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1800 uint32_t val = ((uint32_t *)state->texture[texture_state->level])[offset];
1802 if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000)
1803 val = state->tex_bdr_clr;
1805 out->r = (val >> 16) & 0xff;
1806 out->g = (val >> 8) & 0xff;
1807 out->b = val & 0xff;
1808 out->a = (val >> 24) & 0xff;
1809 }
1811 static void tex_sample_normal(s3d_state_t *state)
1812 {
1813 s3d_texture_state_t texture_state;
1815 texture_state.level = state->max_d;
1816 texture_state.texture_shift = 18 + (9 - texture_state.level);
1817 texture_state.u = state->u + state->tbu;
1818 texture_state.v = state->v + state->tbv;
1820 tex_read(state, &texture_state, &state->dest_rgba);
1821 }
1823 static void tex_sample_normal_filter(s3d_state_t *state)
1824 {
1825 s3d_texture_state_t texture_state;
1826 int tex_offset;
1827 rgba_t tex_samples[4];
1828 int du, dv;
1829 int d[4];
1831 texture_state.level = state->max_d;
1832 texture_state.texture_shift = 18 + (9 - texture_state.level);
1833 tex_offset = 1 << texture_state.texture_shift;
1835 texture_state.u = state->u + state->tbu;
1836 texture_state.v = state->v + state->tbv;
1837 tex_read(state, &texture_state, &tex_samples[0]);
1838 du = (texture_state.u >> (texture_state.texture_shift - 8)) & 0xff;
1839 dv = (texture_state.v >> (texture_state.texture_shift - 8)) & 0xff;
1841 texture_state.u = state->u + state->tbu + tex_offset;
1842 texture_state.v = state->v + state->tbv;
1843 tex_read(state, &texture_state, &tex_samples[1]);
1845 texture_state.u = state->u + state->tbu;
1846 texture_state.v = state->v + state->tbv + tex_offset;
1847 tex_read(state, &texture_state, &tex_samples[2]);
1849 texture_state.u = state->u + state->tbu + tex_offset;
1850 texture_state.v = state->v + state->tbv + tex_offset;
1851 tex_read(state, &texture_state, &tex_samples[3]);
1853 d[0] = (256 - du) * (256 - dv);
1854 d[1] = du * (256 - dv);
1855 d[2] = (256 - du) * dv;
1856 d[3] = du * dv;
1858 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
1859 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
1860 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
1861 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
1862 }
1864 static void tex_sample_mipmap(s3d_state_t *state)
1865 {
1866 s3d_texture_state_t texture_state;
1868 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
1869 if (texture_state.level < 0)
1870 texture_state.level = 0;
1871 texture_state.texture_shift = 18 + (9 - texture_state.level);
1872 texture_state.u = state->u + state->tbu;
1873 texture_state.v = state->v + state->tbv;
1875 tex_read(state, &texture_state, &state->dest_rgba);
1876 }
1878 static void tex_sample_mipmap_filter(s3d_state_t *state)
1879 {
1880 s3d_texture_state_t texture_state;
1881 int tex_offset;
1882 rgba_t tex_samples[4];
1883 int du, dv;
1884 int d[4];
1886 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
1887 if (texture_state.level < 0)
1888 texture_state.level = 0;
1889 texture_state.texture_shift = 18 + (9 - texture_state.level);
1890 tex_offset = 1 << texture_state.texture_shift;
1892 texture_state.u = state->u + state->tbu;
1893 texture_state.v = state->v + state->tbv;
1894 tex_read(state, &texture_state, &tex_samples[0]);
1895 du = (texture_state.u >> (texture_state.texture_shift - 8)) & 0xff;
1896 dv = (texture_state.v >> (texture_state.texture_shift - 8)) & 0xff;
1898 texture_state.u = state->u + state->tbu + tex_offset;
1899 texture_state.v = state->v + state->tbv;
1900 tex_read(state, &texture_state, &tex_samples[1]);
1902 texture_state.u = state->u + state->tbu;
1903 texture_state.v = state->v + state->tbv + tex_offset;
1904 tex_read(state, &texture_state, &tex_samples[2]);
1906 texture_state.u = state->u + state->tbu + tex_offset;
1907 texture_state.v = state->v + state->tbv + tex_offset;
1908 tex_read(state, &texture_state, &tex_samples[3]);
1910 d[0] = (256 - du) * (256 - dv);
1911 d[1] = du * (256 - dv);
1912 d[2] = (256 - du) * dv;
1913 d[3] = du * dv;
1915 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
1916 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
1917 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
1918 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
1919 }
1921 static void tex_sample_persp_normal(s3d_state_t *state)
1922 {
1923 s3d_texture_state_t texture_state;
1924 int32_t w = 0;
1926 if (state->w)
1927 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
1929 texture_state.level = state->max_d;
1930 texture_state.texture_shift = 18 + (9 - texture_state.level);
1931 texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu;
1932 texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv;
1934 tex_read(state, &texture_state, &state->dest_rgba);
1935 }
1937 static void tex_sample_persp_normal_filter(s3d_state_t *state)
1938 {
1939 s3d_texture_state_t texture_state;
1940 int32_t w = 0, u, v;
1941 int tex_offset;
1942 rgba_t tex_samples[4];
1943 int du, dv;
1944 int d[4];
1946 if (state->w)
1947 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
1949 u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu;
1950 v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv;
1952 texture_state.level = state->max_d;
1953 texture_state.texture_shift = 18 + (9 - texture_state.level);
1954 tex_offset = 1 << texture_state.texture_shift;
1956 texture_state.u = u;
1957 texture_state.v = v;
1958 tex_read(state, &texture_state, &tex_samples[0]);
1959 du = (u >> (texture_state.texture_shift - 8)) & 0xff;
1960 dv = (v >> (texture_state.texture_shift - 8)) & 0xff;
1962 texture_state.u = u + tex_offset;
1963 texture_state.v = v;
1964 tex_read(state, &texture_state, &tex_samples[1]);
1966 texture_state.u = u;
1967 texture_state.v = v + tex_offset;
1968 tex_read(state, &texture_state, &tex_samples[2]);
1970 texture_state.u = u + tex_offset;
1971 texture_state.v = v + tex_offset;
1972 tex_read(state, &texture_state, &tex_samples[3]);
1974 d[0] = (256 - du) * (256 - dv);
1975 d[1] = du * (256 - dv);
1976 d[2] = (256 - du) * dv;
1977 d[3] = du * dv;
1979 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
1980 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
1981 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
1982 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
1983 }
1985 static void tex_sample_persp_normal_375(s3d_state_t *state)
1986 {
1987 s3d_texture_state_t texture_state;
1988 int32_t w = 0;
1990 if (state->w)
1991 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
1993 texture_state.level = state->max_d;
1994 texture_state.texture_shift = 18 + (9 - texture_state.level);
1995 texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu;
1996 texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv;
1998 tex_read(state, &texture_state, &state->dest_rgba);
1999 }
2001 static void tex_sample_persp_normal_filter_375(s3d_state_t *state)
2002 {
2003 s3d_texture_state_t texture_state;
2004 int32_t w = 0, u, v;
2005 int tex_offset;
2006 rgba_t tex_samples[4];
2007 int du, dv;
2008 int d[4];
2010 if (state->w)
2011 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
2013 u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu;
2014 v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv;
2016 texture_state.level = state->max_d;
2017 texture_state.texture_shift = 18 + (9 - texture_state.level);
2018 tex_offset = 1 << texture_state.texture_shift;
2020 texture_state.u = u;
2021 texture_state.v = v;
2022 tex_read(state, &texture_state, &tex_samples[0]);
2023 du = (u >> (texture_state.texture_shift - 8)) & 0xff;
2024 dv = (v >> (texture_state.texture_shift - 8)) & 0xff;
2026 texture_state.u = u + tex_offset;
2027 texture_state.v = v;
2028 tex_read(state, &texture_state, &tex_samples[1]);
2030 texture_state.u = u;
2031 texture_state.v = v + tex_offset;
2032 tex_read(state, &texture_state, &tex_samples[2]);
2034 texture_state.u = u + tex_offset;
2035 texture_state.v = v + tex_offset;
2036 tex_read(state, &texture_state, &tex_samples[3]);
2038 d[0] = (256 - du) * (256 - dv);
2039 d[1] = du * (256 - dv);
2040 d[2] = (256 - du) * dv;
2041 d[3] = du * dv;
2043 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
2044 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
2045 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
2046 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
2047 }
2050 static void tex_sample_persp_mipmap(s3d_state_t *state)
2051 {
2052 s3d_texture_state_t texture_state;
2053 int32_t w = 0;
2055 if (state->w)
2056 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
2058 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
2059 if (texture_state.level < 0)
2060 texture_state.level = 0;
2061 texture_state.texture_shift = 18 + (9 - texture_state.level);
2062 texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu;
2063 texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv;
2065 tex_read(state, &texture_state, &state->dest_rgba);
2066 }
2068 static void tex_sample_persp_mipmap_filter(s3d_state_t *state)
2069 {
2070 s3d_texture_state_t texture_state;
2071 int32_t w = 0, u, v;
2072 int tex_offset;
2073 rgba_t tex_samples[4];
2074 int du, dv;
2075 int d[4];
2077 if (state->w)
2078 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
2080 u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu;
2081 v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv;
2083 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
2084 if (texture_state.level < 0)
2085 texture_state.level = 0;
2086 texture_state.texture_shift = 18 + (9 - texture_state.level);
2087 tex_offset = 1 << texture_state.texture_shift;
2089 texture_state.u = u;
2090 texture_state.v = v;
2091 tex_read(state, &texture_state, &tex_samples[0]);
2092 du = (u >> (texture_state.texture_shift - 8)) & 0xff;
2093 dv = (v >> (texture_state.texture_shift - 8)) & 0xff;
2095 texture_state.u = u + tex_offset;
2096 texture_state.v = v;
2097 tex_read(state, &texture_state, &tex_samples[1]);
2099 texture_state.u = u;
2100 texture_state.v = v + tex_offset;
2101 tex_read(state, &texture_state, &tex_samples[2]);
2103 texture_state.u = u + tex_offset;
2104 texture_state.v = v + tex_offset;
2105 tex_read(state, &texture_state, &tex_samples[3]);
2107 d[0] = (256 - du) * (256 - dv);
2108 d[1] = du * (256 - dv);
2109 d[2] = (256 - du) * dv;
2110 d[3] = du * dv;
2112 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
2113 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
2114 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
2115 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
2116 }
2118 static void tex_sample_persp_mipmap_375(s3d_state_t *state)
2119 {
2120 s3d_texture_state_t texture_state;
2121 int32_t w = 0;
2123 if (state->w)
2124 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
2126 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
2127 if (texture_state.level < 0)
2128 texture_state.level = 0;
2129 texture_state.texture_shift = 18 + (9 - texture_state.level);
2130 texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu;
2131 texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv;
2133 tex_read(state, &texture_state, &state->dest_rgba);
2134 }
2136 static void tex_sample_persp_mipmap_filter_375(s3d_state_t *state)
2137 {
2138 s3d_texture_state_t texture_state;
2139 int32_t w = 0, u, v;
2140 int tex_offset;
2141 rgba_t tex_samples[4];
2142 int du, dv;
2143 int d[4];
2145 if (state->w)
2146 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
2148 u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu;
2149 v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv;
2151 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
2152 if (texture_state.level < 0)
2153 texture_state.level = 0;
2154 texture_state.texture_shift = 18 + (9 - texture_state.level);
2155 tex_offset = 1 << texture_state.texture_shift;
2157 texture_state.u = u;
2158 texture_state.v = v;
2159 tex_read(state, &texture_state, &tex_samples[0]);
2160 du = (u >> (texture_state.texture_shift - 8)) & 0xff;
2161 dv = (v >> (texture_state.texture_shift - 8)) & 0xff;
2163 texture_state.u = u + tex_offset;
2164 texture_state.v = v;
2165 tex_read(state, &texture_state, &tex_samples[1]);
2167 texture_state.u = u;
2168 texture_state.v = v + tex_offset;
2169 tex_read(state, &texture_state, &tex_samples[2]);
2171 texture_state.u = u + tex_offset;
2172 texture_state.v = v + tex_offset;
2173 tex_read(state, &texture_state, &tex_samples[3]);
2175 d[0] = (256 - du) * (256 - dv);
2176 d[1] = du * (256 - dv);
2177 d[2] = (256 - du) * dv;
2178 d[3] = du * dv;
2180 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
2181 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
2182 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
2183 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
2184 }
2187 #define CLAMP(x) do \
2188 { \
2189 if ((x) & ~0xff) \
2190 x = ((x) < 0) ? 0 : 0xff; \
2191 } \
2192 while (0)
2194 #define CLAMP_RGBA(r, g, b, a) \
2195 if ((r) & ~0xff) \
2196 r = ((r) < 0) ? 0 : 0xff; \
2197 if ((g) & ~0xff) \
2198 g = ((g) < 0) ? 0 : 0xff; \
2199 if ((b) & ~0xff) \
2200 b = ((b) < 0) ? 0 : 0xff; \
2201 if ((a) & ~0xff) \
2202 a = ((a) < 0) ? 0 : 0xff;
2204 #define CLAMP_RGB(r, g, b) do \
2205 { \
2206 if ((r) < 0) \
2207 r = 0; \
2208 if ((r) > 0xff) \
2209 r = 0xff; \
2210 if ((g) < 0) \
2211 g = 0; \
2212 if ((g) > 0xff) \
2213 g = 0xff; \
2214 if ((b) < 0) \
2215 b = 0; \
2216 if ((b) > 0xff) \
2217 b = 0xff; \
2218 } \
2219 while (0)
2221 static void dest_pixel_gouraud_shaded_triangle(s3d_state_t *state)
2222 {
2223 state->dest_rgba.r = state->r >> 7;
2224 CLAMP(state->dest_rgba.r);
2226 state->dest_rgba.g = state->g >> 7;
2227 CLAMP(state->dest_rgba.g);
2229 state->dest_rgba.b = state->b >> 7;
2230 CLAMP(state->dest_rgba.b);
2232 state->dest_rgba.a = state->a >> 7;
2233 CLAMP(state->dest_rgba.a);
2234 }
2236 static void dest_pixel_unlit_texture_triangle(s3d_state_t *state)
2237 {
2238 tex_sample(state);
2240 if (state->cmd_set & CMD_SET_ABC_SRC)
2241 state->dest_rgba.a = state->a >> 7;
2242 }
2244 static void dest_pixel_lit_texture_decal(s3d_state_t *state)
2245 {
2246 tex_sample(state);
2248 if (state->cmd_set & CMD_SET_ABC_SRC)
2249 state->dest_rgba.a = state->a >> 7;
2250 }
2252 static void dest_pixel_lit_texture_reflection(s3d_state_t *state)
2253 {
2254 tex_sample(state);
2256 state->dest_rgba.r += (state->r >> 7);
2257 state->dest_rgba.g += (state->g >> 7);
2258 state->dest_rgba.b += (state->b >> 7);
2259 if (state->cmd_set & CMD_SET_ABC_SRC)
2260 state->dest_rgba.a += (state->a >> 7);
2262 CLAMP_RGBA(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b, state->dest_rgba.a);
2263 }
2265 static void dest_pixel_lit_texture_modulate(s3d_state_t *state)
2266 {
2267 int r = state->r >> 7, g = state->g >> 7, b = state->b >> 7, a = state->a >> 7;
2269 tex_sample(state);
2271 CLAMP_RGBA(r, g, b, a);
2273 state->dest_rgba.r = ((state->dest_rgba.r) * r) >> 8;
2274 state->dest_rgba.g = ((state->dest_rgba.g) * g) >> 8;
2275 state->dest_rgba.b = ((state->dest_rgba.b) * b) >> 8;
2277 if (state->cmd_set & CMD_SET_ABC_SRC)
2278 state->dest_rgba.a = a;
2279 }
2281 static void tri(virge_t *virge, s3d_t *s3d_tri, s3d_state_t *state, int yc, int32_t dx1, int32_t dx2)
2282 {
2283 uint8_t *vram = virge->svga.vram;
2285 int x_dir = s3d_tri->tlr ? 1 : -1;
2287 int use_z = !(s3d_tri->cmd_set & CMD_SET_ZB_MODE);
2289 int y_count = yc;
2291 int bpp = (s3d_tri->cmd_set >> 2) & 7;
2293 uint32_t dest_offset, z_offset;
2295 if (s3d_tri->cmd_set & CMD_SET_HC)
2296 {
2297 if (state->y < s3d_tri->clip_t)
2298 return;
2299 if (state->y > s3d_tri->clip_b)
2300 {
2301 int diff_y = state->y - s3d_tri->clip_b;
2303 if (diff_y > y_count)
2304 diff_y = y_count;
2306 state->base_u += (s3d_tri->TdUdY * diff_y);
2307 state->base_v += (s3d_tri->TdVdY * diff_y);
2308 state->base_z += (s3d_tri->TdZdY * diff_y);
2309 state->base_r += (s3d_tri->TdRdY * diff_y);
2310 state->base_g += (s3d_tri->TdGdY * diff_y);
2311 state->base_b += (s3d_tri->TdBdY * diff_y);
2312 state->base_a += (s3d_tri->TdAdY * diff_y);
2313 state->base_d += (s3d_tri->TdDdY * diff_y);
2314 state->base_w += (s3d_tri->TdWdY * diff_y);
2315 state->x1 += (dx1 * diff_y);
2316 state->x2 += (dx2 * diff_y);
2317 state->y -= diff_y;
2318 dest_offset -= s3d_tri->dest_str;
2319 z_offset -= s3d_tri->z_str;
2320 y_count -= diff_y;
2321 }
2322 if ((state->y - y_count) < s3d_tri->clip_t)
2323 y_count = state->y - s3d_tri->clip_t;
2324 }
2326 dest_offset = s3d_tri->dest_base + (state->y * s3d_tri->dest_str);
2327 z_offset = s3d_tri->z_base + (state->y * s3d_tri->z_str);
2329 for (; y_count > 0; y_count--)
2330 {
2331 int x = (state->x1 + ((1 << 20) - 1)) >> 20;
2332 int xe = (state->x2 + ((1 << 20) - 1)) >> 20;
2333 uint32_t z = state->base_z;
2334 if (x_dir < 0)
2335 {
2336 x--;
2337 xe--;
2338 }
2340 if (x != xe && (x_dir > 0 && x < xe) || (x_dir < 0 && x > xe))
2341 {
2342 uint32_t dest_addr, z_addr;
2343 int dx = (x_dir > 0) ? ((31 - ((state->x1-1) >> 15)) & 0x1f) : (((state->x1-1) >> 15) & 0x1f);
2344 int x_offset = x_dir * (bpp + 1);
2345 int xz_offset = x_dir << 1;
2346 if (x_dir > 0)
2347 dx += 1;
2348 state->r = state->base_r + ((s3d_tri->TdRdX * dx) >> 5);
2349 state->g = state->base_g + ((s3d_tri->TdGdX * dx) >> 5);
2350 state->b = state->base_b + ((s3d_tri->TdBdX * dx) >> 5);
2351 state->a = state->base_a + ((s3d_tri->TdAdX * dx) >> 5);
2352 state->u = state->base_u + ((s3d_tri->TdUdX * dx) >> 5);
2353 state->v = state->base_v + ((s3d_tri->TdVdX * dx) >> 5);
2354 state->w = state->base_w + ((s3d_tri->TdWdX * dx) >> 5);
2355 state->d = state->base_d + ((s3d_tri->TdDdX * dx) >> 5);
2356 z += ((s3d_tri->TdZdX * dx) >> 5);
2358 // pclog("Draw Y=%i X=%i to XE=%i %i %08x %08x %08x %08x %08x %08x %08x %08x %i %08x\n", state->y, x, xe, dx, state->x1, state->x2, dx1, virge->s3d.TdWdX, state->u, state->v, virge->s3d.TdUdX, virge->s3d.TdUdY, dx, (virge->s3d.TdUdX * dx) >> 4);
2360 if (s3d_tri->cmd_set & CMD_SET_HC)
2361 {
2362 if (x_dir > 0)
2363 {
2364 if (x > s3d_tri->clip_r)
2365 goto tri_skip_line;
2366 if (xe < s3d_tri->clip_l)
2367 goto tri_skip_line;
2368 if (xe > s3d_tri->clip_r)
2369 xe = s3d_tri->clip_r;
2370 if (x < s3d_tri->clip_l)
2371 {
2372 int diff_x = s3d_tri->clip_l - x;
2374 z += (s3d_tri->TdZdX * diff_x);
2375 state->u += (s3d_tri->TdUdX * diff_x);
2376 state->v += (s3d_tri->TdVdX * diff_x);
2377 state->r += (s3d_tri->TdRdX * diff_x);
2378 state->g += (s3d_tri->TdGdX * diff_x);
2379 state->b += (s3d_tri->TdBdX * diff_x);
2380 state->a += (s3d_tri->TdAdX * diff_x);
2381 state->d += (s3d_tri->TdDdX * diff_x);
2382 state->w += (s3d_tri->TdWdX * diff_x);
2384 x = s3d_tri->clip_l;
2385 }
2386 }
2387 else
2388 {
2389 if (x < s3d_tri->clip_l)
2390 goto tri_skip_line;
2391 if (xe > s3d_tri->clip_r)
2392 goto tri_skip_line;
2393 if (xe < s3d_tri->clip_l)
2394 xe = s3d_tri->clip_l;
2395 if (x > s3d_tri->clip_r)
2396 {
2397 int diff_x = x - s3d_tri->clip_r;
2399 z += (s3d_tri->TdZdX * diff_x);
2400 state->u += (s3d_tri->TdUdX * diff_x);
2401 state->v += (s3d_tri->TdVdX * diff_x);
2402 state->r += (s3d_tri->TdRdX * diff_x);
2403 state->g += (s3d_tri->TdGdX * diff_x);
2404 state->b += (s3d_tri->TdBdX * diff_x);
2405 state->a += (s3d_tri->TdAdX * diff_x);
2406 state->d += (s3d_tri->TdDdX * diff_x);
2407 state->w += (s3d_tri->TdWdX * diff_x);
2409 x = s3d_tri->clip_r;
2410 }
2411 }
2412 }
2414 virge->svga.changedvram[(dest_offset & 0x3fffff) >> 12] = changeframecount;
2416 dest_addr = dest_offset + (x * (bpp + 1));
2417 z_addr = z_offset + (x << 1);
2419 for (; x != xe; x = (x + x_dir) & 0xfff)
2420 {
2421 int update = 1;
2422 int16_t src_z;
2423 _x = x; _y = state->y;
2425 if (use_z)
2426 {
2427 src_z = Z_READ(z_addr);
2428 Z_CLIP(src_z, z >> 16);
2429 }
2431 if (update)
2432 {
2433 uint32_t dest_col;
2435 dest_pixel(state);
2437 if (s3d_tri->cmd_set & CMD_SET_ABC_ENABLE)
2438 {
2439 uint32_t src_col;
2440 int src_r, src_g, src_b;
2442 switch (bpp)
2443 {
2444 case 0: /*8 bpp*/
2445 /*Not implemented yet*/
2446 break;
2447 case 1: /*16 bpp*/
2448 src_col = *(uint16_t *)&vram[dest_addr & 0x3fffff];
2449 RGB15_TO_24(src_col, src_r, src_g, src_b);
2450 break;
2451 case 2: /*24 bpp*/
2452 src_col = (*(uint32_t *)&vram[dest_addr & 0x3fffff]) & 0xffffff;
2453 RGB24_TO_24(src_col, src_r, src_g, src_b);
2454 break;
2455 }
2457 state->dest_rgba.r = ((state->dest_rgba.r * state->dest_rgba.a) + (src_r * (255 - state->dest_rgba.a))) / 255;
2458 state->dest_rgba.g = ((state->dest_rgba.g * state->dest_rgba.a) + (src_g * (255 - state->dest_rgba.a))) / 255;
2459 state->dest_rgba.b = ((state->dest_rgba.b * state->dest_rgba.a) + (src_b * (255 - state->dest_rgba.a))) / 255;
2460 }
2462 switch (bpp)
2463 {
2464 case 0: /*8 bpp*/
2465 /*Not implemented yet*/
2466 break;
2467 case 1: /*16 bpp*/
2468 RGB15(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b, dest_col);
2469 *(uint16_t *)&vram[dest_addr] = dest_col;
2470 break;
2471 case 2: /*24 bpp*/
2472 dest_col = RGB24(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b);
2473 *(uint8_t *)&vram[dest_addr] = dest_col & 0xff;
2474 *(uint8_t *)&vram[dest_addr + 1] = (dest_col >> 8) & 0xff;
2475 *(uint8_t *)&vram[dest_addr + 2] = (dest_col >> 16) & 0xff;
2476 break;
2477 }
2479 if (use_z && (s3d_tri->cmd_set & CMD_SET_ZUP))
2480 Z_WRITE(z_addr, src_z);
2481 }
2483 z += s3d_tri->TdZdX;
2484 state->u += s3d_tri->TdUdX;
2485 state->v += s3d_tri->TdVdX;
2486 state->r += s3d_tri->TdRdX;
2487 state->g += s3d_tri->TdGdX;
2488 state->b += s3d_tri->TdBdX;
2489 state->a += s3d_tri->TdAdX;
2490 state->d += s3d_tri->TdDdX;
2491 state->w += s3d_tri->TdWdX;
2492 dest_addr += x_offset;
2493 z_addr += xz_offset;
2494 virge->pixel_count++;
2495 }
2496 }
2497 tri_skip_line:
2498 state->x1 += dx1;
2499 state->x2 += dx2;
2500 state->base_u += s3d_tri->TdUdY;
2501 state->base_v += s3d_tri->TdVdY;
2502 state->base_z += s3d_tri->TdZdY;
2503 state->base_r += s3d_tri->TdRdY;
2504 state->base_g += s3d_tri->TdGdY;
2505 state->base_b += s3d_tri->TdBdY;
2506 state->base_a += s3d_tri->TdAdY;
2507 state->base_d += s3d_tri->TdDdY;
2508 state->base_w += s3d_tri->TdWdY;
2509 state->y--;
2510 dest_offset -= s3d_tri->dest_str;
2511 z_offset -= s3d_tri->z_str;
2512 }
2513 }
2515 static int tex_size[8] =
2516 {
2517 4*2,
2518 2*2,
2519 2*2,
2520 1*2,
2521 2/1,
2522 2/1,
2523 1*2,
2524 1*2
2525 };
2527 static void s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri)
2528 {
2529 s3d_state_t state;
2531 uint32_t tex_base;
2532 int c;
2534 uint64_t start_time = timer_read();
2535 uint64_t end_time;
2537 state.tbu = s3d_tri->tbu << 11;
2538 state.tbv = s3d_tri->tbv << 11;
2540 state.max_d = (s3d_tri->cmd_set >> 8) & 15;
2542 state.tex_bdr_clr = s3d_tri->tex_bdr_clr;
2544 state.cmd_set = s3d_tri->cmd_set;
2546 state.base_u = s3d_tri->tus;
2547 state.base_v = s3d_tri->tvs;
2548 state.base_z = s3d_tri->tzs;
2549 state.base_r = (int32_t)s3d_tri->trs;
2550 state.base_g = (int32_t)s3d_tri->tgs;
2551 state.base_b = (int32_t)s3d_tri->tbs;
2552 state.base_a = (int32_t)s3d_tri->tas;
2553 state.base_d = s3d_tri->tds;
2554 state.base_w = s3d_tri->tws;
2556 tex_base = s3d_tri->tex_base;
2557 for (c = 9; c >= 0; c--)
2558 {
2559 state.texture[c] = (uint16_t *)&virge->svga.vram[tex_base];
2560 if (c <= state.max_d)
2561 tex_base += ((1 << (c*2)) * tex_size[(s3d_tri->cmd_set >> 5) & 7]) / 2;
2562 }
2564 switch ((s3d_tri->cmd_set >> 27) & 0xf)
2565 {
2566 case 0:
2567 dest_pixel = dest_pixel_gouraud_shaded_triangle;
2568 // pclog("dest_pixel_gouraud_shaded_triangle\n");
2569 break;
2570 case 1:
2571 case 5:
2572 switch ((s3d_tri->cmd_set >> 15) & 0x3)
2573 {
2574 case 0:
2575 dest_pixel = dest_pixel_lit_texture_reflection;
2576 // pclog("dest_pixel_lit_texture_reflection\n");
2577 break;
2578 case 1:
2579 dest_pixel = dest_pixel_lit_texture_modulate;
2580 // pclog("dest_pixel_lit_texture_modulate\n");
2581 break;
2582 case 2:
2583 dest_pixel = dest_pixel_lit_texture_decal;
2584 // pclog("dest_pixel_lit_texture_decal\n");
2585 break;
2586 default:
2587 pclog("bad triangle type %x\n", (s3d_tri->cmd_set >> 27) & 0xf);
2588 return;
2589 }
2590 break;
2591 case 2:
2592 case 6:
2593 dest_pixel = dest_pixel_unlit_texture_triangle;
2594 // pclog("dest_pixel_unlit_texture_triangle\n");
2595 break;
2596 default:
2597 pclog("bad triangle type %x\n", (s3d_tri->cmd_set >> 27) & 0xf);
2598 return;
2599 }
2601 switch (((s3d_tri->cmd_set >> 12) & 7) | ((s3d_tri->cmd_set & (1 << 29)) ? 8 : 0))
2602 {
2603 case 0: case 1:
2604 tex_sample = tex_sample_mipmap;
2605 // pclog("use tex_sample_mipmap\n");
2606 break;
2607 case 2: case 3:
2608 tex_sample = virge->bilinear_enabled ? tex_sample_mipmap_filter : tex_sample_mipmap;
2609 // pclog("use tex_sample_mipmap_filter\n");
2610 break;
2611 case 4: case 5:
2612 tex_sample = tex_sample_normal;
2613 // pclog("use tex_sample_normal\n");
2614 break;
2615 case 6: case 7:
2616 tex_sample = virge->bilinear_enabled ? tex_sample_normal_filter : tex_sample_normal;
2617 // pclog("use tex_sample_normal_filter\n");
2618 break;
2619 case (0 | 8): case (1 | 8):
2620 if (virge->is_375)
2621 tex_sample = tex_sample_persp_mipmap_375;
2622 else
2623 tex_sample = tex_sample_persp_mipmap;
2624 // pclog("use tex_sample_persp_mipmap\n");
2625 break;
2626 case (2 | 8): case (3 | 8):
2627 if (virge->is_375)
2628 tex_sample = virge->bilinear_enabled ? tex_sample_persp_mipmap_filter_375 : tex_sample_persp_mipmap_375;
2629 else
2630 tex_sample = virge->bilinear_enabled ? tex_sample_persp_mipmap_filter : tex_sample_persp_mipmap;
2631 // pclog("use tex_sample_persp_mipmap_filter\n");
2632 break;
2633 case (4 | 8): case (5 | 8):
2634 if (virge->is_375)
2635 tex_sample = tex_sample_persp_normal_375;
2636 else
2637 tex_sample = tex_sample_persp_normal;
2638 // pclog("use tex_sample_persp_normal\n");
2639 break;
2640 case (6 | 8): case (7 | 8):
2641 if (virge->is_375)
2642 tex_sample = virge->bilinear_enabled ? tex_sample_persp_normal_filter_375 : tex_sample_persp_normal_375;
2643 else
2644 tex_sample = virge->bilinear_enabled ? tex_sample_persp_normal_filter : tex_sample_persp_normal;
2645 // pclog("use tex_sample_persp_normal_filter\n");
2646 break;
2647 }
2649 switch ((s3d_tri->cmd_set >> 5) & 7)
2650 {
2651 case 0:
2652 tex_read = (s3d_tri->cmd_set & CMD_SET_TWE) ? tex_ARGB8888 : tex_ARGB8888_nowrap;
2653 break;
2654 case 1:
2655 tex_read = (s3d_tri->cmd_set & CMD_SET_TWE) ? tex_ARGB4444 : tex_ARGB4444_nowrap;
2656 // pclog("tex_ARGB4444\n");
2657 break;
2658 case 2:
2659 tex_read = (s3d_tri->cmd_set & CMD_SET_TWE) ? tex_ARGB1555 : tex_ARGB1555_nowrap;
2660 // pclog("tex_ARGB1555 %i\n", (s3d_tri->cmd_set >> 5) & 7);
2661 break;
2662 default:
2663 pclog("bad texture type %i\n", (s3d_tri->cmd_set >> 5) & 7);
2664 tex_read = (s3d_tri->cmd_set & CMD_SET_TWE) ? tex_ARGB1555 : tex_ARGB1555_nowrap;
2665 break;
2666 }
2668 // pclog("Triangle %i %i,%i to %i,%i %08x\n", y, x1 >> 20, y, s3d_tri->txend01 >> 20, y - (s3d_tri->ty01 + s3d_tri->ty12), state.cmd_set);
2670 state.y = s3d_tri->tys;
2671 state.x1 = s3d_tri->txs;
2672 state.x2 = s3d_tri->txend01;
2673 tri(virge, s3d_tri, &state, s3d_tri->ty01, s3d_tri->TdXdY02, s3d_tri->TdXdY01);
2674 state.x2 = s3d_tri->txend12;
2675 tri(virge, s3d_tri, &state, s3d_tri->ty12, s3d_tri->TdXdY02, s3d_tri->TdXdY12);
2677 virge->tri_count++;
2679 end_time = timer_read();
2681 virge_time += end_time - start_time;
2682 }
2684 static void render_thread(void *param)
2685 {
2686 virge_t *virge = (virge_t *)param;
2688 while (1)
2689 {
2690 thread_wait_event(virge->wake_render_thread, -1);
2691 thread_reset_event(virge->wake_render_thread);
2692 virge->s3d_busy = 1;
2693 while (!RB_EMPTY)
2694 {
2695 s3_virge_triangle(virge, &virge->s3d_buffer[virge->s3d_read_idx & RB_MASK]);
2696 virge->s3d_read_idx++;
2698 if (RB_ENTRIES == RB_SIZE - 1)
2699 thread_set_event(virge->not_full_event);
2700 }
2701 virge->s3d_busy = 0;
2702 }
2703 }
2705 static void queue_triangle(virge_t *virge)
2706 {
2707 int c;
2708 // pclog("queue_triangle: read=%i write=%i RB_ENTRIES=%i RB_FULL=%i\n", virge->s3d_read_idx, virge->s3d_write_idx, RB_ENTRIES, RB_FULL);
2709 if (RB_FULL)
2710 {
2711 thread_reset_event(virge->not_full_event);
2712 if (RB_FULL)
2713 thread_wait_event(virge->not_full_event, -1); /*Wait for room in ringbuffer*/
2714 }
2715 // pclog(" add at read=%i write=%i %i\n", virge->s3d_read_idx, virge->s3d_write_idx, virge->s3d_write_idx & RB_MASK);
2716 virge->s3d_buffer[virge->s3d_write_idx & RB_MASK] = virge->s3d_tri;
2717 virge->s3d_write_idx++;
2718 if (!virge->s3d_busy)
2719 thread_set_event(virge->wake_render_thread); /*Wake up render thread if moving from idle*/
2720 }
2722 static void s3_virge_hwcursor_draw(svga_t *svga, int displine)
2723 {
2724 int x;
2725 uint16_t dat[2];
2726 int xx;
2727 int offset = svga->hwcursor_latch.x - svga->hwcursor_latch.xoff;
2729 // pclog("HWcursor %i %i\n", svga->hwcursor_latch.x, svga->hwcursor_latch.y);
2730 for (x = 0; x < 64; x += 16)
2731 {
2732 dat[0] = (svga->vram[svga->hwcursor_latch.addr] << 8) | svga->vram[svga->hwcursor_latch.addr + 1];
2733 dat[1] = (svga->vram[svga->hwcursor_latch.addr + 2] << 8) | svga->vram[svga->hwcursor_latch.addr + 3];
2734 for (xx = 0; xx < 16; xx++)
2735 {
2736 if (offset >= svga->hwcursor_latch.x)
2737 {
2738 if (!(dat[0] & 0x8000))
2739 ((uint32_t *)buffer32->line[displine])[offset + 32] = (dat[1] & 0x8000) ? 0xffffff : 0;
2740 else if (dat[1] & 0x8000)
2741 ((uint32_t *)buffer32->line[displine])[offset + 32] ^= 0xffffff;
2742 // pclog("Plot %i, %i (%i %i) %04X %04X\n", offset, displine, x+xx, svga->hwcursor_on, dat[0], dat[1]);
2743 }
2745 offset++;
2746 dat[0] <<= 1;
2747 dat[1] <<= 1;
2748 }
2749 svga->hwcursor_latch.addr += 4;
2750 }
2751 }
2753 #define DECODE_YCbCr() \
2754 do \
2755 { \
2756 int c; \
2757 \
2758 for (c = 0; c < 2; c++) \
2759 { \
2760 uint8_t y1, y2; \
2761 int8_t Cr, Cb; \
2762 int dR, dG, dB; \
2763 \
2764 y1 = src[0]; \
2765 Cr = src[1] - 0x80; \
2766 y2 = src[2]; \
2767 Cb = src[3] - 0x80; \
2768 src += 4; \
2769 \
2770 dR = (359*Cr) >> 8; \
2771 dG = (88*Cb + 183*Cr) >> 8; \
2772 dB = (453*Cb) >> 8; \
2773 \
2774 r[x_write] = y1 + dR; \
2775 CLAMP(r[x_write]); \
2776 g[x_write] = y1 - dG; \
2777 CLAMP(g[x_write]); \
2778 b[x_write] = y1 + dB; \
2779 CLAMP(b[x_write]); \
2780 \
2781 r[x_write+1] = y2 + dR; \
2782 CLAMP(r[x_write+1]); \
2783 g[x_write+1] = y2 - dG; \
2784 CLAMP(g[x_write+1]); \
2785 b[x_write+1] = y2 + dB; \
2786 CLAMP(b[x_write+1]); \
2787 \
2788 x_write = (x_write + 2) & 7; \
2789 } \
2790 } while (0)
2792 /*Both YUV formats are untested*/
2793 #define DECODE_YUV211() \
2794 do \
2795 { \
2796 uint8_t y1, y2, y3, y4; \
2797 int8_t U, V; \
2798 int dR, dG, dB; \
2799 \
2800 U = src[0] - 0x80; \
2801 y1 = (298 * (src[1] - 16)) >> 8; \
2802 y2 = (298 * (src[2] - 16)) >> 8; \
2803 V = src[3] - 0x80; \
2804 y3 = (298 * (src[4] - 16)) >> 8; \
2805 y4 = (298 * (src[5] - 16)) >> 8; \
2806 src += 6; \
2807 \
2808 dR = (309*V) >> 8; \
2809 dG = (100*U + 208*V) >> 8; \
2810 dB = (516*U) >> 8; \
2811 \
2812 r[x_write] = y1 + dR; \
2813 CLAMP(r[x_write]); \
2814 g[x_write] = y1 - dG; \
2815 CLAMP(g[x_write]); \
2816 b[x_write] = y1 + dB; \
2817 CLAMP(b[x_write]); \
2818 \
2819 r[x_write+1] = y2 + dR; \
2820 CLAMP(r[x_write+1]); \
2821 g[x_write+1] = y2 - dG; \
2822 CLAMP(g[x_write+1]); \
2823 b[x_write+1] = y2 + dB; \
2824 CLAMP(b[x_write+1]); \
2825 \
2826 r[x_write+2] = y2 + dR; \
2827 CLAMP(r[x_write+2]); \
2828 g[x_write+2] = y2 - dG; \
2829 CLAMP(g[x_write+2]); \
2830 b[x_write+2] = y2 + dB; \
2831 CLAMP(b[x_write+2]); \
2832 \
2833 r[x_write+3] = y2 + dR; \
2834 CLAMP(r[x_write+3]); \
2835 g[x_write+3] = y2 - dG; \
2836 CLAMP(g[x_write+3]); \
2837 b[x_write+3] = y2 + dB; \
2838 CLAMP(b[x_write+3]); \
2839 \
2840 x_write = (x_write + 4) & 7; \
2841 } while (0)
2843 #define DECODE_YUV422() \
2844 do \
2845 { \
2846 int c; \
2847 \
2848 for (c = 0; c < 2; c++) \
2849 { \
2850 uint8_t y1, y2; \
2851 int8_t U, V; \
2852 int dR, dG, dB; \
2853 \
2854 U = src[0] - 0x80; \
2855 y1 = (298 * (src[1] - 16)) >> 8; \
2856 V = src[2] - 0x80; \
2857 y2 = (298 * (src[3] - 16)) >> 8; \
2858 src += 4; \
2859 \
2860 dR = (309*V) >> 8; \
2861 dG = (100*U + 208*V) >> 8; \
2862 dB = (516*U) >> 8; \
2863 \
2864 r[x_write] = y1 + dR; \
2865 CLAMP(r[x_write]); \
2866 g[x_write] = y1 - dG; \
2867 CLAMP(g[x_write]); \
2868 b[x_write] = y1 + dB; \
2869 CLAMP(b[x_write]); \
2870 \
2871 r[x_write+1] = y2 + dR; \
2872 CLAMP(r[x_write+1]); \
2873 g[x_write+1] = y2 - dG; \
2874 CLAMP(g[x_write+1]); \
2875 b[x_write+1] = y2 + dB; \
2876 CLAMP(b[x_write+1]); \
2877 \
2878 x_write = (x_write + 2) & 7; \
2879 } \
2880 } while (0)
2882 #define DECODE_RGB555() \
2883 do \
2884 { \
2885 int c; \
2886 \
2887 for (c = 0; c < 4; c++) \
2888 { \
2889 uint16_t dat; \
2890 \
2891 dat = *(uint16_t *)src; \
2892 src += 2; \
2893 \
2894 r[x_write + c] = ((dat & 0x001f) << 3) | ((dat & 0x001f) >> 2); \
2895 g[x_write + c] = ((dat & 0x03e0) >> 2) | ((dat & 0x03e0) >> 7); \
2896 b[x_write + c] = ((dat & 0x7c00) >> 7) | ((dat & 0x7c00) >> 12); \
2897 } \
2898 x_write = (x_write + 4) & 7; \
2899 } while (0)
2901 #define DECODE_RGB565() \
2902 do \
2903 { \
2904 int c; \
2905 \
2906 for (c = 0; c < 4; c++) \
2907 { \
2908 uint16_t dat; \
2909 \
2910 dat = *(uint16_t *)src; \
2911 src += 2; \
2912 \
2913 r[x_write + c] = ((dat & 0x001f) << 3) | ((dat & 0x001f) >> 2); \
2914 g[x_write + c] = ((dat & 0x07e0) >> 3) | ((dat & 0x07e0) >> 9); \
2915 b[x_write + c] = ((dat & 0xf800) >> 8) | ((dat & 0xf800) >> 13); \
2916 } \
2917 x_write = (x_write + 4) & 7; \
2918 } while (0)
2920 #define DECODE_RGB888() \
2921 do \
2922 { \
2923 int c; \
2924 \
2925 for (c = 0; c < 4; c++) \
2926 { \
2927 r[x_write + c] = src[0]; \
2928 g[x_write + c] = src[1]; \
2929 b[x_write + c] = src[2]; \
2930 src += 3; \
2931 } \
2932 x_write = (x_write + 4) & 7; \
2933 } while (0)
2935 #define DECODE_XRGB8888() \
2936 do \
2937 { \
2938 int c; \
2939 \
2940 for (c = 0; c < 4; c++) \
2941 { \
2942 r[x_write + c] = src[0]; \
2943 g[x_write + c] = src[1]; \
2944 b[x_write + c] = src[2]; \
2945 src += 4; \
2946 } \
2947 x_write = (x_write + 4) & 7; \
2948 } while (0)
2950 #define OVERLAY_SAMPLE() \
2951 do \
2952 { \
2953 switch (virge->streams.sdif) \
2954 { \
2955 case 1: \
2956 DECODE_YCbCr(); \
2957 break; \
2958 case 2: \
2959 DECODE_YUV422(); \
2960 break; \
2961 case 3: \
2962 DECODE_RGB555(); \
2963 break; \
2964 case 4: \
2965 DECODE_YUV211(); \
2966 break; \
2967 case 5: \
2968 DECODE_RGB565(); \
2969 break; \
2970 case 6: \
2971 DECODE_RGB888(); \
2972 break; \
2973 case 7: \
2974 default: \
2975 DECODE_XRGB8888(); \
2976 break; \
2977 } \
2978 } while (0)
2980 static void s3_virge_overlay_draw(svga_t *svga, int displine)
2981 {
2982 virge_t *virge = (virge_t *)svga->p;
2983 int offset = (virge->streams.sec_x - virge->streams.pri_x) + 1;
2984 int h_acc = virge->streams.dda_horiz_accumulator;
2985 int r[8], g[8], b[8];
2986 int r_samp[8] = {0, 0, 0, 0, 0, 0, 0, 0};
2987 int g_samp[8] = {0, 0, 0, 0, 0, 0, 0, 0};
2988 int b_samp[8] = {0, 0, 0, 0, 0, 0, 0, 0};
2989 int x_size, x_read = 4, x_write = 4;
2990 int x;
2991 uint32_t *p;
2992 uint8_t *src = &svga->vram[svga->overlay_latch.addr];
2994 p = &((uint32_t *)buffer32->line[displine])[offset + 32];
2996 if ((offset + virge->streams.sec_w) > virge->streams.pri_w)
2997 x_size = (virge->streams.pri_w - virge->streams.sec_x) + 1;
2998 else
2999 x_size = virge->streams.sec_w + 1;
3001 OVERLAY_SAMPLE();
3003 for (x = 0; x < x_size; x++)
3004 {
3005 *p++ = r[x_read] | (g[x_read] << 8) | (b[x_read] << 16);
3007 h_acc += virge->streams.k1_horiz_scale;
3008 if (h_acc >= 0)
3009 {
3010 if ((x_read ^ (x_read + 1)) & ~3)
3011 OVERLAY_SAMPLE();
3012 x_read = (x_read + 1) & 7;
3014 h_acc += (virge->streams.k2_horiz_scale - virge->streams.k1_horiz_scale);
3015 }
3016 }
3018 svga->overlay_latch.v_acc += virge->streams.k1_vert_scale;
3019 if (svga->overlay_latch.v_acc >= 0)
3020 {
3021 svga->overlay_latch.v_acc += (virge->streams.k2_vert_scale - virge->streams.k1_vert_scale);
3022 svga->overlay_latch.addr += virge->streams.sec_stride;
3023 }
3024 }
3026 static uint8_t s3_virge_pci_read(int func, int addr, void *p)
3027 {
3028 virge_t *virge = (virge_t *)p;
3029 svga_t *svga = &virge->svga;
3030 uint8_t ret = 0;
3031 // pclog("S3 PCI read %08X ", addr);
3032 switch (addr)
3033 {
3034 case 0x00: ret = 0x33; break; /*'S3'*/
3035 case 0x01: ret = 0x53; break;
3037 case 0x02: ret = virge->virge_id_low; break;
3038 case 0x03: ret = virge->virge_id_high; break;
3040 case 0x04: ret = virge->pci_regs[0x04] & 0x27; break;
3042 case 0x07: ret = virge->pci_regs[0x07] & 0x36; break;
3044 case 0x08: ret = 0; break; /*Revision ID*/
3045 case 0x09: ret = 0; break; /*Programming interface*/
3047 case 0x0a: ret = 0x00; break; /*Supports VGA interface*/
3048 case 0x0b: ret = 0x03; /*output = 3; */break;
3050 case 0x0d: ret = virge->pci_regs[0x0d] & 0xf8; break;
3052 case 0x10: ret = 0x00; break;/*Linear frame buffer address*/
3053 case 0x11: ret = 0x00; break;
3054 case 0x12: ret = 0x00; break;
3055 case 0x13: ret = svga->crtc[0x59] & 0xfc; break;
3057 case 0x30: ret = virge->pci_regs[0x30] & 0x01; break; /*BIOS ROM address*/
3058 case 0x31: ret = 0x00; break;
3059 case 0x32: ret = virge->pci_regs[0x32]; break;
3060 case 0x33: ret = virge->pci_regs[0x33]; break;
3062 case 0x3c: ret = virge->pci_regs[0x3c]; break;
3064 case 0x3d: ret = 0x01; break; /*INTA*/
3066 case 0x3e: ret = 0x04; break;
3067 case 0x3f: ret = 0xff; break;
3069 }
3070 // pclog("%02X\n", ret);
3071 return ret;
3072 }
3074 static void s3_virge_pci_write(int func, int addr, uint8_t val, void *p)
3075 {
3076 virge_t *virge = (virge_t *)p;
3077 svga_t *svga = &virge->svga;
3078 // pclog("S3 PCI write %08X %02X %04X:%08X\n", addr, val, CS, pc);
3079 switch (addr)
3080 {
3081 case 0x00: case 0x01: case 0x02: case 0x03:
3082 case 0x08: case 0x09: case 0x0a: case 0x0b:
3083 case 0x3d: case 0x3e: case 0x3f:
3084 return;
3086 case PCI_REG_COMMAND:
3087 if (val & PCI_COMMAND_IO)
3088 {
3089 io_removehandler(0x03c0, 0x0020, s3_virge_in, NULL, NULL, s3_virge_out, NULL, NULL, virge);
3090 io_sethandler(0x03c0, 0x0020, s3_virge_in, NULL, NULL, s3_virge_out, NULL, NULL, virge);
3091 }
3092 else
3093 io_removehandler(0x03c0, 0x0020, s3_virge_in, NULL, NULL, s3_virge_out, NULL, NULL, virge);
3094 virge->pci_regs[PCI_REG_COMMAND] = val & 0x27;
3095 return;
3096 case 0x07:
3097 virge->pci_regs[0x07] = val & 0x3e;
3098 return;
3099 case 0x0d:
3100 virge->pci_regs[0x0d] = val & 0xf8;
3101 return;
3103 case 0x13:
3104 svga->crtc[0x59] = val & 0xfc;
3105 s3_virge_updatemapping(virge);
3106 return;
3108 case 0x30: case 0x32: case 0x33:
3109 virge->pci_regs[addr] = val;
3110 if (virge->pci_regs[0x30] & 0x01)
3111 {
3112 uint32_t addr = (virge->pci_regs[0x32] << 16) | (virge->pci_regs[0x33] << 24);
3113 // pclog("Virge bios_rom enabled at %08x\n", addr);
3114 mem_mapping_set_addr(&virge->bios_rom.mapping, addr, 0x8000);
3115 mem_mapping_enable(&virge->bios_rom.mapping);
3116 }
3117 else
3118 {
3119 // pclog("Virge bios_rom disabled\n");
3120 mem_mapping_disable(&virge->bios_rom.mapping);
3121 }
3122 return;
3123 case 0x3c:
3124 virge->pci_regs[0x3c] = val;
3125 return;
3126 }
3127 }
3129 static void *s3_virge_init()
3130 {
3131 virge_t *virge = malloc(sizeof(virge_t));
3132 memset(virge, 0, sizeof(virge_t));
3134 virge->bilinear_enabled = device_get_config_int("bilinear");
3135 virge->dithering_enabled = device_get_config_int("dithering");
3136 virge->memory_size = device_get_config_int("memory");
3138 svga_init(&virge->svga, virge, virge->memory_size << 20,
3139 s3_virge_recalctimings,
3140 s3_virge_in, s3_virge_out,
3141 s3_virge_hwcursor_draw,
3142 s3_virge_overlay_draw);
3144 rom_init(&virge->bios_rom, "roms/s3virge.bin", 0xc0000, 0x8000, 0x7fff, 0, MEM_MAPPING_EXTERNAL);
3145 if (PCI)
3146 mem_mapping_disable(&virge->bios_rom.mapping);
3148 mem_mapping_add(&virge->mmio_mapping, 0, 0, s3_virge_mmio_read,
3149 s3_virge_mmio_read_w,
3150 s3_virge_mmio_read_l,
3151 s3_virge_mmio_write,
3152 s3_virge_mmio_write_w,
3153 s3_virge_mmio_write_l,
3154 NULL,
3155 0,
3156 virge);
3157 mem_mapping_add(&virge->new_mmio_mapping, 0, 0, s3_virge_mmio_read,
3158 s3_virge_mmio_read_w,
3159 s3_virge_mmio_read_l,
3160 s3_virge_mmio_write,
3161 s3_virge_mmio_write_w,
3162 s3_virge_mmio_write_l,
3163 NULL,
3164 0,
3165 virge);
3166 mem_mapping_add(&virge->linear_mapping, 0, 0, svga_read_linear,
3167 svga_readw_linear,
3168 svga_readl_linear,
3169 svga_write_linear,
3170 svga_writew_linear,
3171 svga_writel_linear,
3172 NULL,
3173 0,
3174 &virge->svga);
3176 io_sethandler(0x03c0, 0x0020, s3_virge_in, NULL, NULL, s3_virge_out, NULL, NULL, virge);
3178 virge->pci_regs[4] = 3;
3179 virge->pci_regs[5] = 0;
3180 virge->pci_regs[6] = 0;
3181 virge->pci_regs[7] = 2;
3182 virge->pci_regs[0x32] = 0x0c;
3183 virge->pci_regs[0x3d] = 1;
3184 virge->pci_regs[0x3e] = 4;
3185 virge->pci_regs[0x3f] = 0xff;
3187 virge->virge_id_high = 0x56;
3188 virge->virge_id_low = 0x31;
3189 virge->virge_rev = 0;
3190 virge->virge_id = 0xe1;
3192 switch (virge->memory_size)
3193 {
3194 case 2:
3195 virge->svga.crtc[0x36] = 2 | (0 << 2) | (1 << 4) | (4 << 5);
3196 break;
3197 case 4:
3198 default:
3199 virge->svga.crtc[0x36] = 2 | (0 << 2) | (1 << 4) | (0 << 5);
3200 break;
3201 }
3203 virge->svga.crtc[0x37] = 1;// | (7 << 5);
3204 virge->svga.crtc[0x53] = 1 << 3;
3205 virge->svga.crtc[0x59] = 0x70;
3207 virge->is_375 = 0;
3209 pci_add(s3_virge_pci_read, s3_virge_pci_write, virge);
3211 virge->wake_render_thread = thread_create_event();
3212 virge->wake_main_thread = thread_create_event();
3213 virge->not_full_event = thread_create_event();
3214 virge->render_thread = thread_create(render_thread, virge);
3216 return virge;
3217 }
3219 static void *s3_virge_375_init()
3220 {
3221 virge_t *virge = malloc(sizeof(virge_t));
3222 memset(virge, 0, sizeof(virge_t));
3224 virge->bilinear_enabled = device_get_config_int("bilinear");
3225 virge->dithering_enabled = device_get_config_int("dithering");
3226 virge->memory_size = device_get_config_int("memory");
3228 svga_init(&virge->svga, virge, virge->memory_size << 20,
3229 s3_virge_recalctimings,
3230 s3_virge_in, s3_virge_out,
3231 s3_virge_hwcursor_draw,
3232 s3_virge_overlay_draw);
3234 rom_init(&virge->bios_rom, "roms/86c375_1.bin", 0xc0000, 0x8000, 0x7fff, 0, MEM_MAPPING_EXTERNAL);
3235 if (PCI)
3236 mem_mapping_disable(&virge->bios_rom.mapping);
3238 mem_mapping_add(&virge->mmio_mapping, 0, 0, s3_virge_mmio_read,
3239 s3_virge_mmio_read_w,
3240 s3_virge_mmio_read_l,
3241 s3_virge_mmio_write,
3242 s3_virge_mmio_write_w,
3243 s3_virge_mmio_write_l,
3244 NULL,
3245 0,
3246 virge);
3247 mem_mapping_add(&virge->new_mmio_mapping, 0, 0, s3_virge_mmio_read,
3248 s3_virge_mmio_read_w,
3249 s3_virge_mmio_read_l,
3250 s3_virge_mmio_write,
3251 s3_virge_mmio_write_w,
3252 s3_virge_mmio_write_l,
3253 NULL,
3254 0,
3255 virge);
3256 mem_mapping_add(&virge->linear_mapping, 0, 0, svga_read_linear,
3257 svga_readw_linear,
3258 svga_readl_linear,
3259 svga_write_linear,
3260 svga_writew_linear,
3261 svga_writel_linear,
3262 NULL,
3263 0,
3264 &virge->svga);
3266 io_sethandler(0x03c0, 0x0020, s3_virge_in, NULL, NULL, s3_virge_out, NULL, NULL, virge);
3268 virge->pci_regs[4] = 3;
3269 virge->pci_regs[5] = 0;
3270 virge->pci_regs[6] = 0;
3271 virge->pci_regs[7] = 2;
3272 virge->pci_regs[0x32] = 0x0c;
3273 virge->pci_regs[0x3d] = 1;
3274 virge->pci_regs[0x3e] = 4;
3275 virge->pci_regs[0x3f] = 0xff;
3277 virge->virge_id_high = 0x8a;
3278 virge->virge_id_low = 0x01;
3279 virge->virge_rev = 0;
3280 virge->virge_id = 0xe1;
3282 switch (virge->memory_size)
3283 {
3284 case 2:
3285 virge->svga.crtc[0x36] = 2 | (0 << 2) | (1 << 4) | (4 << 5);
3286 break;
3287 case 4:
3288 default:
3289 virge->svga.crtc[0x36] = 2 | (0 << 2) | (1 << 4) | (0 << 5);
3290 break;
3291 }
3292 // virge->svga.crtc[0x36] = 2 | (0 << 2) | (1 << 4);
3293 virge->svga.crtc[0x37] = 1;// | (7 << 5);
3294 virge->svga.crtc[0x53] = 1 << 3;
3295 virge->svga.crtc[0x59] = 0x70;
3297 virge->svga.crtc[0x6c] = 0x01;
3299 virge->is_375 = 1;
3301 pci_add(s3_virge_pci_read, s3_virge_pci_write, virge);
3303 virge->wake_render_thread = thread_create_event();
3304 virge->wake_main_thread = thread_create_event();
3305 virge->not_full_event = thread_create_event();
3306 virge->render_thread = thread_create(render_thread, virge);
3308 return virge;
3309 }
3311 static void s3_virge_close(void *p)
3312 {
3313 virge_t *virge = (virge_t *)p;
3314 FILE *f = fopen("vram.dmp", "wb");
3315 fwrite(virge->svga.vram, 4 << 20, 1, f);
3316 fclose(f);
3318 thread_kill(virge->render_thread);
3319 thread_destroy_event(virge->not_full_event);
3320 thread_destroy_event(virge->wake_main_thread);
3321 thread_destroy_event(virge->wake_render_thread);
3323 svga_close(&virge->svga);
3325 free(virge);
3326 }
3328 static int s3_virge_available()
3329 {
3330 return rom_present("roms/s3virge.bin");
3331 }
3333 static int s3_virge_375_available()
3334 {
3335 return rom_present("roms/86c375_1.bin");
3336 }
3338 static void s3_virge_speed_changed(void *p)
3339 {
3340 virge_t *virge = (virge_t *)p;
3342 svga_recalctimings(&virge->svga);
3343 }
3345 static void s3_virge_force_redraw(void *p)
3346 {
3347 virge_t *virge = (virge_t *)p;
3349 virge->svga.fullchange = changeframecount;
3350 }
3352 static void s3_virge_add_status_info(char *s, int max_len, void *p)
3353 {
3354 virge_t *virge = (virge_t *)p;
3355 char temps[256];
3356 uint64_t new_time = timer_read();
3357 uint64_t status_diff = new_time - status_time;
3358 status_time = new_time;
3360 if (!status_diff)
3361 status_diff = 1;
3363 svga_add_status_info(s, max_len, &virge->svga);
3364 sprintf(temps, "%f Mpixels/sec\n%f ktris/sec\n%f%% CPU\n%f%% CPU (real)\n%d writes %i reads\n\n", (double)virge->pixel_count/1000000.0, (double)virge->tri_count/1000.0, ((double)virge_time * 100.0) / timer_freq, ((double)virge_time * 100.0) / status_diff, reg_writes, reg_reads);
3365 strncat(s, temps, max_len);
3367 virge->pixel_count = virge->tri_count = 0;
3368 virge_time = 0;
3369 reg_reads = 0;
3370 reg_writes = 0;
3371 }
3373 static device_config_t s3_virge_config[] =
3374 {
3375 {
3376 .name = "memory",
3377 .description = "Memory size",
3378 .type = CONFIG_SELECTION,
3379 .selection =
3380 {
3381 {
3382 .description = "2 MB",
3383 .value = 2
3384 },
3385 {
3386 .description = "4 MB",
3387 .value = 4
3388 },
3389 {
3390 .description = ""
3391 }
3392 },
3393 .default_int = 4
3394 },
3395 {
3396 .name = "bilinear",
3397 .description = "Bilinear filtering",
3398 .type = CONFIG_BINARY,
3399 .default_int = 1
3400 },
3401 {
3402 .name = "dithering",
3403 .description = "Dithering",
3404 .type = CONFIG_BINARY,
3405 .default_int = 1
3406 },
3407 {
3408 .type = -1
3409 }
3410 };
3412 device_t s3_virge_device =
3413 {
3414 "Diamond Stealth 3D 2000 (S3 ViRGE)",
3415 DEVICE_NOT_WORKING,
3416 s3_virge_init,
3417 s3_virge_close,
3418 s3_virge_available,
3419 s3_virge_speed_changed,
3420 s3_virge_force_redraw,
3421 s3_virge_add_status_info,
3422 s3_virge_config
3423 };
3425 device_t s3_virge_375_device =
3426 {
3427 "S3 ViRGE/DX",
3428 DEVICE_NOT_WORKING,
3429 s3_virge_375_init,
3430 s3_virge_close,
3431 s3_virge_375_available,
3432 s3_virge_speed_changed,
3433 s3_virge_force_redraw,
3434 s3_virge_add_status_info,
3435 s3_virge_config
3436 };
