/*
    Lieves!Tuore presents after a short leave of 10 years: Atsaas
    Marq - main code
    Yzi - sound code, music, add. code
    Man - gfx
*/

#include "msxlib.h"
#include <stdlib.h>
#include <string.h>
#include <interrupt.h>
#include <ioport.h>
#include "ltplay3.h"

#define BITX 80
#define BITY 100
#define BITSX (BITX/8)
#define BITSIZE (BITX/8*BITY)

int cunter=0;

extern char spitex4[],spitex6[],spitex[],spitex5[],
            logobw[],logob[],logohede[],logowam[],logogreet[],
            logorno[],logozen[];
char *cursprite=0;

// These will be devastated by the gfx loader
unsigned char bitmap[BITSIZE*8],
              sqrtt[2624];
static short dist[BITX/2*BITY/2];

// Sprite flight paths (12 sprites, 2 coords)
#define STEPS 128
#define flight bitmap

signed char sini[256]={
0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57,59,62,65,67,70,73,75,
78,80,82,85,87,89,91,94,96,98,100,102,103,105,107,108,110,112,113,114,116,117,
118,119,120,121,122,123,123,124,125,125,126,126,126,126,126,127,126,126,126,
126,126,125,125,124,123,123,122,121,120,119,118,117,116,114,113,112,110,108,
107,105,103,102,100,98,96,94,91,89,87,85,82,80,78,75,73,70,67,65,62,59,57,54,
51,48,45,42,39,36,33,30,27,24,21,18,15,12,9,6,3,0,-3,-6,-9,-12,-15,-18,-21,
-24,-27,-30,-33,-36,-39,-42,-45,-48,-51,-54,-57,-59,-62,-65,-67,-70,-73,-75,
-78,-80,-82,-85,-87,-89,-91,-94,-96,-98,-100,-102,-103,-105,-107,-108,-110,
-112,-113,-114,-116,-117,-118,-119,-120,-121,-122,-123,-123,-124,-125,-125,
-126,-126,-126,-126,-126,-127,-126,-126,-126,-126,-126,-125,-125,-124,-123,
-123,-122,-121,-120,-119,-118,-117,-116,-114,-113,-112,-110,-108,-107,-105,
-103,-102,-100,-98,-96,-94,-91,-89,-87,-85,-82,-80,-78,-75,-73,-70,-67,-65,
-62,-59,-57,-54,-51,-48,-45,-42,-39,-36,-33,-30,-27,-24,-21,-18,-15,-12,-9,
-6,-3};

void vdp_blank(void)
{   
    vdp_register(VDP_MODE1,MODE1_IE+MODE1_VRAM+MODE1_SPRITE_SZ+MODE1_SPRITE_MAG);
}

void vdp_unblank(void)
{
    vdp_register(VDP_MODE1,MODE1_IE+MODE1_VRAM+MODE1_BLANK+MODE1_SPRITE_SZ+MODE1_SPRITE_MAG);
}

// Borrowed from "Multimedia Guru"
unsigned char sqrt(int a){
    int ret=0;
    int s;
    int ret_sq=-a-1;

    if(sqrtt[a]!=255)
        return(sqrtt[a]);

    for(s=14; s>=0; s-=2){
        int b;
        ret+= ret;
        b=ret_sq + ((2*ret+1)<<s);
        if(b<0){
            ret_sq=b;
            ret++;
        }
    }

    sqrtt[a]=ret;
    return ret;
}

// Draw one line of sprite overlay
void putline(int addy,char *data)
{
    addy;data;
    _asm
    push    hl
    push    bc
    push    de

    ld      e,4(ix)
    ld      d,5(ix)
    ld      l,6(ix)
    ld      h,7(ix)

    ld      b,#8*2
    ld      c,#0x98

$41:
    ld      a,e
    out     (0x99),a
    ld      a,d
    nop
    nop
    or      #0x40
    out     (0x99),a

    push    hl
    ld      hl,#16
    add     hl,de
    push    hl
    pop     de
    pop     hl

    outi
    push    de
    ld      de,#15
    add     hl,de
    pop     de

    djnz    $41

    pop     de
    pop     bc
    pop     hl
    _endasm;
}

// Clear one line of sprite overlay
void clearline(int addy)
{
    addy;
    _asm
    push    bc
    push    de

    ld      e,4(ix)
    ld      d,5(ix)

    ld      b,#8

$42:
    ld      a,e
    out     (0x99),a
    ld      a,d
    nop
    nop
    or      #0x40
    out     (0x99),a

    push    hl
    ld      hl,#16
    add     hl,de
    push    hl
    pop     de
    pop     hl

    sub     a
    out     (0x98),a
    djnz    $42

    pop     de
    pop     bc
    _endasm;
}

#define FADEIN 1
#define FADEOUT 2

// Inits the fade or keeps fading the sprite
void fadeoverlay(int init)
{
    static int gunther=0;
    static char fading=0;
    int i;

    if(init)
    {
        gunther=0;
        fading=init;
        return;
    }

    if(!fading)
        return;

    if(fading==FADEIN)
    {
        i=((gunther>>4)<<7) + (gunther&15);
        putline(0x3800+i,&cursprite[i]);

        i=(((95-gunther)>>4)<<7) + ((95-gunther)&15);
        putline(0x3800+i,&cursprite[i]);

        gunther+=2;
        if(gunther==96)
            fading=0;
    }
    else
    {
        i=((gunther>>4)<<7) + (gunther&15);
        clearline(0x3800+i);

        i=(((95-gunther)>>4)<<7) + ((95-gunther)&15);
        clearline(0x3800+i);

        gunther+=2;
        if(gunther==96)
            fading=0;
    }
}

unsigned char spritepos[32*2];

// Set num sprite positions starting from "first"
void setsprites(int first,int num)
{
    first; num;
    _asm
    push    de
    push    bc
    push    hl

    ld      de,#0x1b00
    ld      l,4(ix)
    ld      h,5(ix)
    add     hl,hl
    add     hl,hl
    add     hl,de
    push    hl
    pop     de
    ld      c,#0x98
    ld      b,6(ix)
    ld      a,b
    add     b
    add     b
    ld      b,a
    ld      hl,#_spritepos

$30:
    ld      a,e
    out     (0x99),a
    ld      a,d
    nop
    nop
    or      #0x40
    out     (0x99),a
    nop
    nop
    nop
    
    outi
    inc     de
    inc     de
    inc     de
    outi
    nop
    inc     de
    djnz    $30

    pop     hl
    pop     bc
    pop     de
    _endasm;
}

// Just a faster version
void setsprites12(char *data)
{
    data;
    _asm
    push    de
    push    bc
    push    hl

    ld      de,#0x1b00
    ld      hl,#0
    add     hl,de
    push    hl
    pop     de
    ld      c,#0x98
    ld      b,#12*3
    ld      l,4(ix)
    ld      h,5(ix)

$39:
    ld      a,e
    out     (0x99),a
    ld      a,d
    nop
    or      #0x40
    out     (0x99),a
    nop
    
    outi
    inc     de
    inc     de
    inc     de
    outi
    inc     de
    djnz    $30

    pop     hl
    pop     bc
    pop     de
    _endasm;
}

#define pixon(x,y) bitmap[((x)>>3)+(y)*BITSX]|=0x80>>((x)&7);

/* Copy a linear bitmap to the pattern table */
void kopsaa(unsigned char *bmap)
{
    bmap;
    _asm
    push    ix
    ld      ix,#0
    add     ix,sp
    push    iy
    push    bc
    push    de
    push    hl

    ld      l,6(ix)
    ld      h,7(ix)
    ld      de,#10
    ld      c,#8

$0: ld      b,#10

$1: scf
    ccf
    push    hl
    ld      a,(hl)
    adc     hl,de
    out     (0x98),a
    nop
    ld      a,(hl)
    adc     hl,de
    out     (0x98),a
    nop
    ld      a,(hl)
    adc     hl,de
    out     (0x98),a
    nop
    ld      a,(hl)
    adc     hl,de
    out     (0x98),a

    ld      a,(hl)
    adc     hl,de
    out     (0x98),a
    nop
    ld      a,(hl)
    adc     hl,de
    out     (0x98),a
    nop
    ld      a,(hl)
    adc     hl,de
    out     (0x98),a
    nop
    ld      a,(hl)
    adc     hl,de
    out     (0x98),a
    nop

    pop     hl
    inc     hl
    djnz    $1

    push    de
    scf
    ccf
    ld      de,#7*10
    adc     hl,de
    pop     de
    dec     c
    jp      nz,$0

    pop     hl
    pop     de
    pop     bc
    pop     iy
    pop     ix
    _endasm;
}

void circles_init(void)
{
    int x,y,yk,xk,
        tmp,n,i,c;
    unsigned char *ps,*pd;

    /* Ringz */
    memset(bitmap,0,BITSIZE);
    for(y=0;y<BITY/2;y++)
    {
        yk=y-BITY/2;
        yk*=yk;
        for(x=0;x<BITX/2;x++)
        {
            xk=x-BITX/2;
            tmp=xk*xk+yk;
            if(tmp>=2624)
                continue;
            c=sqrt(tmp)>>4;
            if(c<4 && !(c&1))
            {
                pixon(x,y);
                pixon(BITX-x-1,y);
                pixon(x,BITY-y-1);
                pixon(BITX-x-1,BITY-y-1);
            }
        }
    }

    for(n=0;n<7;n++) // Shifted versions
    {
        ps=&bitmap[(n+1)*BITSIZE-1];
        pd=&bitmap[(n+2)*BITSIZE-1];
        c=0;
        for(i=0;i<BITSIZE;i++)
        {
            if(c)
                *pd=(*ps<<1)+1;
            else
                *pd=*ps<<1;
            c=(*ps)&0x80;
            pd--;
            ps--;
        }
    }
}


// vdp_quadpoke does four consecutive "vpokes" of the same color
void vdp_quadpoke(unsigned int destvdpaddr, char color)
{
    destvdpaddr;color;
    _asm

    ld  c,4(ix)
    ld  a,c
    out (0x99),a
    ld  c,5(ix)
    ld  a,c
    or  a,#0x40
    ld  c,a
    out (0x99),a
    ld  a,6(ix)
    out (0x98),a
    nop
    nop
    nop
    out (0x98),a
    nop
    nop
    nop
    out (0x98),a
    nop
    nop
    nop
    out (0x98),a
    
    _endasm;
}

#define FADE_SPEED 4


void circles(char fade_out)
{
    static int first=1,
               x,xk,y,yk,tmp,
               fade_scanline, 
               fade_offset;
    static int n,i,off,tmp2;
    static unsigned char c;

    if(first)
    {
        first=0;
        for(y=n=0;y<24;y++) // Mosaic nametable
        {
            yk=(((y+2)&3)+(y+2)/4-1)*10;
            for(x=0;x<32;x++,n++)
            {
                c=((x+2)&3)+((x+2)>>2)-1 + yk;
                DI;
                vdp_poke(0x1800+n,c);
                EI;
            }
        }
    
        off=0x2000;
        for(i=0;i<5;i++) // Gradient
        {
            switch(i)
            {
                case 0: tmp=DARK_RED; tmp2=DARK_RED; break;
                case 1: tmp=DARK_RED; tmp2=RED; break;
                case 2: tmp=RED; tmp2=RED; break;
                case 3: tmp=RED; tmp2=LIGHT_RED; break;
                case 4: tmp=LIGHT_RED; tmp2=LIGHT_RED; break;
            }
            for(n=0;n<2*10*8;n++,off++)
            {
                DI;
                if(n&1)
                {
                    vdp_poke(off,(tmp<<4)+WHITE);
                    vdp_poke(off+256*8,(tmp<<4)+WHITE);
                    vdp_poke(off+256*8*2,(tmp<<4)+WHITE);
                }
                else
                {
                    vdp_poke(off,(tmp2<<4)+GRAY);
                    vdp_poke(off+256*8,(tmp2<<4)+GRAY);
                    vdp_poke(off+256*8*2,(tmp2<<4)+GRAY);
                }
                EI;
            }
        }

        x=40;
        xk=1;
        tmp=0;
		
		fade_scanline = 0;
    } // if first

    y=28-((tmp-128)*(tmp-128)>>9); // Bounce
    x+=xk;
    if(x==40 || x==120)
        xk=-xk;

    vdp_address(0);
    kopsaa(&bitmap[x/8+(x&7)*BITSIZE+y*BITSX]);
	
        if (fade_out)
        {       
            if (fade_scanline < 192)
            {
                if (fade_scanline == 0) fade_offset = 0x2000;
                for (n=0; n<10; n++, fade_offset += 8) // 10 merkki per rivi
                    vdp_quadpoke(fade_offset, 0); // for other values of FADE_SPEED you need a different vdp_Xpoke version...
                fade_scanline += FADE_SPEED;
                if ((fade_scanline & 63) == 0)
                // lohko vaihtuu
                    fade_offset += -(8 - FADE_SPEED) + -80*8 + 256*8;
                else if ((fade_scanline & 7) == 0)
                // merkkirivi vaihtuu
                    fade_offset += -(8 - FADE_SPEED);
                else
                    fade_offset += -(80 - FADE_SPEED);
                
            }
        }
	

    tmp=(tmp+6)&255;
}

// Duplicate the same byte to a row
void duplicate8(unsigned char *buf)
{
    buf;
    _asm
    push    bc
    push    hl

    ld      l,4(ix)
    ld      h,5(ix)
    ld      b,#32

$3: ld      a,(hl)
    out     (0x98),a
    out     (0x98),a
    out     (0x98),a
    out     (0x98),a
    out     (0x98),a
    out     (0x98),a
    out     (0x98),a
    out     (0x98),a
    inc     hl
    djnz    $3

    pop     hl
    pop     bc
    _endasm;
}

// Every second byte of a row different
void duplicate2(unsigned char *buf)
{
    buf;
    _asm
    push    bc
    push    hl

    ld      l,4(ix)
    ld      h,5(ix)
    ld      c,#0x98
    ld      e,#8

$4: ld      b,#8
    ld      a,(hl)
    inc     hl
    ld      d,(hl)
    inc     hl

$5: out     (0x98),a
    nop
    out     (c),d
    nop
    out     (0x98),a
    nop
    out     (c),d
    nop
    out     (0x98),a
    nop
    out     (c),d
    nop
    out     (0x98),a
    nop
    out     (c),d
    djnz    $5

    dec     e
    jp      nz,$4

    pop     hl
    pop     bc
    _endasm;
}

// Every second byte of a row different, just slower :(
void duplicate2slow(unsigned char *buf)
{
    buf;
    _asm
    push    bc
    push    hl

    ld      l,4(ix)
    ld      h,5(ix)
    ld      c,#0x98
    ld      e,#8

$34:
    ld      b,#8
    ld      a,(hl)
    inc     hl
    ld      d,(hl)
    inc     hl

$35:
    out     (0x98),a
    nop
    nop
    nop
    out     (c),d
    nop
    nop
    nop
    out     (0x98),a
    nop
    nop
    nop
    out     (c),d
    nop
    nop
    nop
    out     (0x98),a
    nop
    nop
    nop
    out     (c),d
    nop
    nop
    nop
    out     (0x98),a
    nop
    nop
    nop
    out     (c),d
    nop
    djnz    $35

    dec     e
    jp      nz,$34

    pop     hl
    pop     bc
    _endasm;
}

#define pixon2(x) rivi[(x)>>3]|=0x80>>((x)&7);
unsigned char rivi[32],varit[32*2],
              prek[33*4],savy[33*2];

void blind_init(void)
{
    int n,i;

    for(n=0;n<33;n++)
    {
        memset(rivi,0,4); // Different widths
        for(i=0;i<n;i++)
            pixon2(16-n/2+i);
        memcpy(&prek[n<<2],rivi,4);

        switch(n/7) // ..and colors
        {
            case 0: savy[n*2]=DARK_RED*16+WHITE;
                    savy[n*2+1]=DARK_RED*16+WHITE; break;
            case 1: savy[n*2]=DARK_RED*16+WHITE;
                    savy[n*2+1]=RED*16+WHITE; break;
            case 2: savy[n*2]=RED*16+WHITE;
                    savy[n*2+1]=RED*16+WHITE; break;
            case 3: savy[n*2]=RED*16+WHITE;
                    savy[n*2+1]=LIGHT_RED*16+WHITE; break;
            case 4: savy[n*2]=LIGHT_RED*16+WHITE;
                    savy[n*2+1]=LIGHT_RED*16+WHITE; break;
        }
    }
    memset(rivi,0,32);
    memset(varit,0,32*2);
}

// Simply copy four bytes since memcpy() sucks
void copy4(unsigned char *dst,unsigned char *src)
{
    dst;src;
    _asm
    push    hl

    ld      e,4(ix)
    ld      d,5(ix)
    ld      l,6(ix)
    ld      h,7(ix)

    ldi
    ldi
    ldi
    ldi

    pop     hl
    _endasm;
}

// The Blind effect
void blind(void)
{
    static int first=1,phase=96,yplus=0;
    int n,tmp,lev2;
    signed short lev;
    unsigned char *varip,*rivip;
    unsigned char phase2;

    if(first) // Stripes onscreen
    {
        first=0;
        for(n=0;n<32*24;n++)
        {
            DI;
            vdp_poke(0x1800+n,n&31);
            EI;
        }
    }

    vdp_address(0);
    duplicate8(rivi);
    vdp_address(0x2000);
    duplicate2(varit);
    vdp_address(0x2000+32*8*8);
    duplicate2slow(varit);
    vdp_address(0x2000+32*8*8*2);
    duplicate2slow(varit);

    /* Unused music sync:
        if (music_channel_0_level > 0xF0)
		yplus = 24;
	else
		if (yplus > 8) yplus--;*/

    tmp=yplus+(sini[phase]>>4);
    varip=varit;
    rivip=rivi;
    phase2=phase;
    phase2+=phase2;

    for(n=0;n<8;n++,rivip+=4,phase2+=16) // Go through the blinds
    {
        lev=tmp+(sini[phase2]>>3); // Size 0..32
        if(lev<=0)
            lev=0;
        if(lev>32)
            lev=32;
        lev2=lev+lev;

        copy4(rivip,&prek[lev2+lev2]);
        *varip++=savy[lev2];
        *varip++=savy[lev2+1];
    }

    if(music_sync_b==0x24) // fade-out starts from music sync point B=0x24
    {
        if (yplus > 0) yplus-=cunter&1;
    }
    else
        if (yplus < 16) yplus += (cunter&1); // fade-in

    phase=(phase+1)&0xff;
} // blind

#define pixonp(x,y) pd[((x)>>3)+(y)*BITSX]|=0x80>>((x)&7);
void tunnel_init(void)
{
    int x,y,yk,xk,
        tmp,n,i,c;
    unsigned char *pd;

    /* Ringz */
    memset(bitmap,0,BITSIZE*8);
    memset(dist,-1,(BITX/2)*(BITY/2));

    for(n=0;n<8;n++) // 8 different phases
    {
        pd=&bitmap[n*BITSIZE];
        for(y=i=0;y<BITY/2;y++)
        {
            yk=y-BITY/2;
            yk*=yk;
            for(x=0;x<BITX/2;x++,i++)
            {
                if(dist[i]==-1)
                {
                    xk=x-BITX/2;
                    tmp=xk*xk+yk;
                    if(tmp>=2624)
                        continue;
                    c=sqrt(tmp);
                    dist[i]=c;
                }
                else
                    c=dist[i];

                c=(c+n)>>2; // Offset by n for movement

                if(c&1)
                {
                    pixonp(x,y);
                    pixonp(BITX-x-1,y);
                    pixonp(x,BITY-y-1);
                    pixonp(BITX-x-1,BITY-y-1);
                }
            }
        }
    }
} // tunnel_init


// The Tunnel effect. 
// If fade_out != 0, then a fade-out is performed.
// If run != 0, then actual effect is shown, otherweise only initialization code is executed 
void tunnel(char run, char fade_out)
{
    static int first=1,
               x,xk,y,yk,tmp, 
               fade_scanline, 
               fade_offset;
    static int n,i,off,tmp2;
    static unsigned char c;

    if(first) 
    {
        first=0;
        for(y=n=0;y<24;y++) // Mosaic nametable
        {
            yk=(((y+2)&3)+(y+2)/4-1)*10;
            for(x=0;x<32;x++,n++)
            {
                c=((x+2)&3)+((x+2)>>2)-1 + yk;
                DI;
                vdp_poke(0x1800+n,c);
                EI;
            }
        }

        waitVB();
        off=0x2000;
        for(i=0;i<5;i++) // Gradient
        {
            switch(i)
            {
                case 0: tmp=DARK_RED; tmp2=RED; break;
                case 1: tmp=RED; tmp2=LIGHT_RED; break;
                case 2: tmp=RED; tmp2=LIGHT_RED; break;
                case 3: tmp=DARK_RED; tmp2=RED; break;
                case 4: tmp=DARK_RED; tmp2=RED; break;
            }
            for(n=0;n<2*10*8;n++,off++)
            {
                /*DI;
                if(n&1)
                    vdp_poke(off,(tmp<<4)+LIGHT_YELLOW);
                else
                    vdp_poke(off,(tmp2<<4)+WHITE);
                EI;*/
                DI;
                if(n&1)
                {
                    vdp_poke(off,(tmp<<4)+LIGHT_YELLOW);
                    vdp_poke(off+256*8,(tmp<<4)+LIGHT_YELLOW);
                    vdp_poke(off+256*8*2,(tmp<<4)+LIGHT_YELLOW);
                }
                else
                {
                    vdp_poke(off,(tmp2<<4)+WHITE);
                    vdp_poke(off+256*8,(tmp2<<4)+WHITE);
                    vdp_poke(off+256*8*2,(tmp2<<4)+WHITE);
                }
                EI;
//              if (n&31==0) waitVB();
            }
        }

        x=40;
        xk=1;
        tmp=0;
        
        fade_scanline = 0;
    } // if first

    if (run)
    {
        vdp_address(0);

        // It's that simple. Just copy bitmaps to different y pos.
        tmp2=xk&7;
        tmp2=7-tmp2;
        y=16+sini[tmp]/18;
        kopsaa(&bitmap[tmp2*BITSIZE+y*BITSX]);

        if (fade_out)
        {       
            if (fade_scanline < 192)
            {
                if (fade_scanline == 0) fade_offset = 0x2000;
                for (n=0; n<10; n++, fade_offset += 8) // 10 merkki per rivi
                    vdp_quadpoke(fade_offset, 0); // for other values of FADE_SPEED you need a different vdp_Xpoke version...
                fade_scanline += FADE_SPEED;
                if ((fade_scanline & 63) == 0)
                // lohko vaihtuu
                    fade_offset += -(8 - FADE_SPEED) + -80*8 + 256*8;
                else if ((fade_scanline & 7) == 0)
                // merkkirivi vaihtuu
                    fade_offset += -(8 - FADE_SPEED);
                else
                    fade_offset += -(80 - FADE_SPEED);
                
            }
        }


        tmp=(tmp+4)&255;
        xk++;
    }
} // tunnel

unsigned char skriin[(32+6)*24],
              vert[192];

// Clears the y direction buffer fast
void clry(void)
{
    _asm
    push    hl
    push    bc

    ld      hl,#_vert
    ld      a,#1
    ld      b,#192/4

$6: ld      (hl),a
    inc     hl
    ld      (hl),a
    inc     hl
    ld      (hl),a
    inc     hl
    ld      (hl),a
    inc     hl
    djnz    $6

    pop     bc
    pop     hl
    _endasm;
}

// Horiz. bar colors
unsigned char balk[16]={
    DARK_RED,
    RED,
    DARK_RED,
    RED,
    LIGHT_RED,
    RED,
    LIGHT_RED,
    WHITE,

    WHITE,
    LIGHT_RED,
    RED,
    LIGHT_RED,
    RED,
    DARK_RED,
    RED,
    DARK_RED};

// Copy a horiz bar.
void balkcpy(unsigned char *dest)
{
    dest;
    _asm
    push    ix
    ld      ix,#0
    add     ix,sp
    push    bc
    push    hl
    push    de

    ld      e,6(ix)
    ld      d,7(ix)
    ld      hl,#_balk

    ldi
    ldi
    ldi
    ldi
    ldi
    ldi
    ldi
    ldi

    ldi
    ldi
    ldi
    ldi
    ldi
    ldi
    ldi
    ldi

    pop     de
    pop     hl
    pop     bc
    pop     ix
    _endasm;
} // balkcpy

// Copy the nametable with modulo
void copywmodulo(unsigned char *src)
{
    src;
    _asm
    push    ix
    ld      ix,#0
    add     ix,sp
    push    bc
    push    hl
    push    de

    ld      l,6(ix)
    ld      h,7(ix)
    ld      b,#24
    ld      c,#0x98
    ld      de,#6

$7: outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop
    
    outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop

    outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop

    outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop
    outi
    nop
    nop
    outi

    scf
    ccf
    adc     hl,de
    dec     b
    jp      nz,$7

    pop     de
    pop     hl
    pop     bc
    pop     ix
    _endasm;
} // copywmodulo

// The Colorbars effect
void balkkar(void)
{
    static int first=1,tid=0,offx=0;
    int n,x,y;
    static unsigned char ms=0;
    static unsigned char flash_ticks = 0;

    if(first)
    {
        /*DI;
        vdp_address(0);
        vdp_set(0,24*8*3);
        EI;*/
		vdp_blank(); // blank out the screen while "uploading" data to screen memory 
        for(n=0;n<24*8*3;n++)
        {
            DI;
            vdp_poke(n,0);
            EI;
        }

        for(n=0;n<3;n++)
        {
            DI;
            vdp_address(0x2000+24*8+n*256*8); // Vert. bar colors
            vdp_slowset(DARK_RED*16+RED,8);
            vdp_slowset(LIGHT_RED*16+WHITE,8);
            vdp_slowset(DARK_RED*16+RED,8);
            EI;
        }

        for(y=n=0;y<24;y++)
            for(x=0;x<38;x++,n++)
                skriin[n]=y;

        waitVB();
        DI;
        vdp_address(0x0000+24*8); // Patterns
        vdp_slowset(0xb4,8);
        vdp_slowset(0x42,8);
        vdp_slowset(0x2d,8);
        EI;
        /*for(n=0;n<8;n++)
        {
            DI;
            vdp_poke(24*8+);
            EI;
        }*/

        for(y=0;y<24;y++) // And name table buffer
        {
            n=38*y;
            for(x=0;x<38;x+=6,n+=6)
            {
                skriin[n]=24;
                skriin[n+1]=25;
                skriin[n+2]=26;
            }
        }

        ms = music_sync_a;
        first=0;
        waitVB();
		vdp_unblank();
    }

    vdp_address(0x2000);
    vdp_copy(vert,64);
    vdp_address(0x2000+256*8+64);
    vdp_copy(&vert[64],64);
    vdp_address(0x2000+256*8*2+128);
    vdp_copy(&vert[128],64);

    vdp_address(0x1800);
    copywmodulo(&skriin[offx]);

    // Background bars
    clry();
    for(n=0;n<8;n++)
    {
        y=96+((sini[(tid+(n<<4))&255])>>1);
        balkcpy(&vert[y]);
    }

    fadeoverlay(0);

    offx=(offx+1 + ((music_sync_b & 1) << 2) )%6; // suunnanvaihto
    tid = (tid + 1 + (((music_sync_a) & 1) << 2)) & 0xff; // nopeudenvaihto
    if (music_sync_a - ms) 
            flash_ticks = 6;
    if (flash_ticks)
    {
            switch (flash_ticks)
            {
                    case 6: vdp_register(VDP_COLOR,WHITE); break;
                    case 5: vdp_register(VDP_COLOR,LIGHT_YELLOW); break;
                    case 4: vdp_register(VDP_COLOR,LIGHT_RED); break;
                    case 3: vdp_register(VDP_COLOR,GRAY); break;
                    case 2: vdp_register(VDP_COLOR,RED); break;
                    case 1: vdp_register(VDP_COLOR,DARK_RED); break;
            }
            flash_ticks --;
    }
    else
            vdp_register(VDP_COLOR,BLACK);
    ms = music_sync_a;
    
/*  Old sync = no sync
    offx=(offx+5)%6;
    tid=(tid+2)&0xff;*/
} // Balkkar

/* Copy a linear bitmap to the pattern table */
void bitblit(unsigned char *bmap)
{
    bmap;
    _asm
    push    ix
    ld      ix,#0
    add     ix,sp
    push    iy
    push    bc
    push    de
    push    hl

    ld      l,6(ix)
    ld      h,7(ix)
    ld      de,#32
    ld      c,#5

$8: ld      b,#32

$9: scf
    ccf
    push    hl
    ld      a,(hl)
    adc     hl,de
    out     (0x98),a
    ld      a,(hl)
    adc     hl,de
    out     (0x98),a
    ld      a,(hl)
    adc     hl,de
    out     (0x98),a
    ld      a,(hl)
    adc     hl,de
    out     (0x98),a

    ld      a,(hl)
    adc     hl,de
    out     (0x98),a
    ld      a,(hl)
    adc     hl,de
    out     (0x98),a
    ld      a,(hl)
    adc     hl,de
    out     (0x98),a
    ld      a,(hl)
    adc     hl,de
    out     (0x98),a

    pop     hl
    inc     hl
    djnz    $9

    push    de
    scf
    ccf
    ld      de,#7*32
    adc     hl,de
    pop     de
    dec     c
    jp      nz,$8

    pop     hl
    pop     de
    pop     bc
    pop     iy
    pop     ix
    _endasm;
}

// Clear the bitmap fast
void clearasil(unsigned char *dest,unsigned char c)
{
    dest;c;
    _asm
    push    ix
    ld      ix,#0
    add     ix,sp
    push    bc
    push    hl

    ld      l,6(ix)
    ld      h,7(ix)
    ld      c,8(ix)
    ld      b,#32*5
    sub     a

$10:
    ld      (hl),a
    inc     hl
    ld      (hl),a
    inc     hl
    ld      (hl),a
    inc     hl
    ld      (hl),a
    inc     hl
    ld      (hl),a
    inc     hl
    ld      (hl),a
    inc     hl
    ld      (hl),a
    inc     hl
    ld      (hl),a
    inc     hl

    djnz    $10

    pop     hl
    pop     bc
    pop     ix
    _endasm;
}

extern unsigned char silu[]; // Animation

// This should be redone, draws the silhouette
void fastsilu(unsigned char *bytes)
{
    bytes;
    _asm
    push    ix
    ld      ix,#0
    add     ix,sp
    push    bc
    push    hl
    push    de

    ld      e,6(ix)
    ld      d,7(ix)
    ld      b,#256/8

    exx
    ld      c,#0x80
    ld      hl,#_bitmap
    exx

$12:
    ld      a,(de)
    inc     de
    bit     #7,a
    jr      nz,$13  ; Vert line too?
$14:
    and     #0x7f
    jp      z,$11   ; No line here

    exx             ; Plot a pixel
    ld      d,#0
    ld      e,a
    ex      de,hl
    scf
    ccf
    adc     hl,hl
    adc     hl,hl
    adc     hl,hl
    adc     hl,hl
    adc     hl,hl
    adc     hl,de
    ld      a,c
    or      (hl)
    ld      (hl),a
    ex      de,hl
    exx

$11:
    exx             ; Next column
    rrc     c
    exx
    jp      nc,$12

    exx
    inc     hl
    exx
    djnz    $12
    jp      $15

$13:
    ld      l,a     ; Draw a vertical line
    and     #0x7f
    exx
    ld      d,#0
    ld      e,a
    ex      de,hl
    scf
    ccf
    adc     hl,hl
    adc     hl,hl
    adc     hl,hl
    adc     hl,hl
    adc     hl,hl
    adc     hl,de

    ld      b,a
    ld      a,#64
    sub     b
    ld      b,a

    push    de
    ld      de,#32
    scf
    ccf

$16:
    ld      a,c
    or      (hl)
    ld      (hl),a
    adc     hl,de
    djnz    $16

    pop     de
    ex      de,hl
    exx
    ld      a,l
    jp      $14

$15:
    pop     de
    pop     hl
    pop     bc
    pop     ix
    _endasm;
}

#define pixon3(x,y) bitmap[((x)>>3)+(y)*32]|=0x80>>((x)&7);
void kube(void)
{
    int n;
    static int frame=0,first=1,page=0;
    unsigned char *p;

    static unsigned char ms;
    static unsigned char flash_ticks = 0;

    if(first)
    {
        for(n=0;n<256*8*3;n++)
        {
            DI;
            vdp_poke(n,0);
            EI;
        }
        for(n=0;n<32*8;n++)
        {
            DI;
            vdp_poke(0x1800+n,n&255);
            EI;
        }
        for(n=0;n<32*16;n++) // Repeat row in bottom
        {
            DI;
            vdp_poke(0x1800+32*8+n,(n&31)+32*7);
            EI;
        }
        //waitVB();
        //DI;
        //vdp_address(0x2000);
        //vdp_set(BLACK*16+WHITE,256*8*3);
        //EI;
        for(n=0;n<256*8*3;n++)
        {
            DI;
            vdp_poke(0x2000+n,BLACK*16+WHITE);
            EI;
        }
        //DI;
        //vdp_address(0);
        //vdp_set(0,3*32*8);
        //EI;
        
        memset(bitmap,0,32*8*8);

        waitVB();
        first=0;
        ms = music_sync_a;
    }

    // Flaaaash
    if (music_sync_a - ms) 
            flash_ticks = 4;
    if (flash_ticks)
    {
            switch (flash_ticks)
            {
                    case 4: vdp_register(VDP_COLOR,LIGHT_RED); break;
                    case 3: vdp_register(VDP_COLOR,DARK_YELLOW); break;
                    case 2: vdp_register(VDP_COLOR,RED); break;
                    case 1: vdp_register(VDP_COLOR,DARK_RED); break;
            }
            flash_ticks --;
    }
    else
            vdp_register(VDP_COLOR,WHITE);
    ms = music_sync_a;

    /*
        I have to elaborate a bit. This piece here is to slow down the code.
        It feels bad but it's necessary because we don't have two pages
        and need to synchronize to the screen output :( Probably will break
        if SDCC improves someday.
    */
    for(n=0;n<700;n++)
        ;

    vdp_address(32*8*3);
    bitblit(&bitmap[32*8*2 +32*4]);
    clearasil(&bitmap[32*8*3],0xaa>>(frame&1));

    p=&silu[frame*256];
    fastsilu(p);
    /*for(n=0;n<256;n++)
    {
        i=*p++;
        if(i&0x7f)
            pixon3(n,i);
        if(i&0x80)
        {
            for(j=i&0x7f;j<60;j++)
                pixon3(n,j);
        }
    }*/

    frame=(frame+1 - ((music_sync_b & 1) << 1) - ((music_sync_t & 1) << 1)) & 31;

    // frame=(frame+1)&31;
}

// Reads a file to memory. Needs fixing - dest doesn't work??
void readfile(char *name,unsigned char *dest)
{
    name;dest;
    _asm
    push    ix
    ld      ix,#0
    add     ix,sp
    push    de
    push    hl
    push    bc

    ld      de,#$21
    ld      b,#40   ; Clear FCB
    xor     a
$25:
    ld      (de),a
    inc     de
    djnz    $25

    ld      de,#$21+1 ; Copy name
    ld      l,6(ix)
    ld      h,7(ix)
    ld      bc,#11
    ldir

    ld      de,#$21
    ld      c,#0xf   ; Open file
    call    5

    xor     a
    ld      ($21+12),a
    ld      ($21+13),a
    ld      ($21+32),a

    ;ld      e,8(ix)
    ;ld      d,9(ix)
    ld      de,#_bitmap ; Ok, just this time

$23:
    push    de
    ld      de,#$21
    ld      c,#0x14
    call    5
    pop     de
    or      a
    jr      nz,$24
    ld      hl,#0x80
    ld      bc,#128
    ldir
    jp      $23

$24:
    ld      de,#$21
    ld      c,#0x10
    call    5
    jp      $22

$21:
    .db     0,0,0,0,0, 0,0,0,0,0
    .db     0,0,0,0,0, 0,0,0,0,0
    .db     0,0,0,0,0, 0,0,0,0,0
    .db     0,0,0,0,0, 0,0,0,0,0
$22:
    pop     bc
    pop     hl
    pop     de
    pop     ix
    _endasm;
}

// Prepare a 1-bitplane sprite
void sprite1bit(char *data,int offset)
{
    int n;

    DI;
    vdp_address(0x3800+offset*32);
    EI;
    if(data!=0)
        vdp_slowcopy(data,4*3*32);

    for(n=0;n<12;n++)
    {
        DI;
        sprite_col(n+offset,BLACK);
        EI;
    }
}

// Prepare a sprite and hide it
void spriteput(char *data,unsigned char right)
{
    int n;

    if(right)
        right=192;

    vdp_address(0x3800);
    if(data!=0)
        vdp_slowset(0,4*6*32);
        //vdp_slowcopy(&data[2],4*6*32);
    
    cursprite=&data[2];

    waitVB();
    for(n=0;n<32;n++)
    {
        DI;
        if(data==0 || n>=24)
            sprite_pos(n,0,192);
        else
            sprite_pos(n,right+(n/2&1)*32,n/4*32);
        EI;
    }
    for(n=0;n<24;n++)
    {
        DI;
        sprite_col(n,data[n&1]);
        EI;
    }
}

// If something needs to run in VBI
void (*vbrun)(void)=0;

// The VBI handler
void my_isr(void) interrupt
{
    in(0x99);
    music_play();

    if(vbrun!=0)
        vbrun();

    cunter++;
    EI;
}

void show_bitmap_nicely(void)
{
    int n, x, y;
    unsigned char *bmpptr;

    DI;
    vdp_blank(); // clear colortable first so there's less crap shown when loading the picture
    vdp_address(0x2000);
    vdp_slowset(0,32*24*8);
    vdp_unblank();
    EI;
    waitVB();
    DI; 
    vdp_address(0x0000);
    vdp_slowcopy(bitmap,32*24*8);
    EI;
    // Let's fade in the starting picture
    for (x=0; x<32; x++)
    {
        if (x & 1) waitVB(); // two columns and then wait for vb
        n = (x << 3);
        bmpptr = bitmap + n + 32*24*8;
        n += 0x2000;
        for (y=0; y<24; y++)
        {
            DI;
            vdp_address(n);
            vdp_slowcopy(bmpptr,8);
            EI;
            n += 256;
            bmpptr += 256;
        }
    }
} /* show_bitmap_nicely */

void fade_out_nicely(char color)
{
    int n, x, y;
    unsigned char *bmpptr;

    // Let's fade out the starting picture
    for (x=0; x<32; x++)
    {
        waitVB();
        n = (x << 3);
        bmpptr = bitmap + n + 32*24*8;
        n += 0x2000;
        for (y=0; y<24; y++)
        {
            DI;
            vdp_address(n);
            vdp_slowset(color,8);
            EI;
            n += 256;
            bmpptr += 256;
        }
    }
} // fade_out_nicely

void put_some_nongarbage_on_screen()
{
    // put some non-garbage on screen 
    vdp_blank();
    DI;
    vdp_address(0x0); 
    vdp_slowset(0x00, 0x800);
    EI;
    DI; 
    vdp_address(0x1800); 
    vdp_slowset(0xFF, 0x300);
    EI;
    DI; 
    vdp_address(0x2000 + 0xFF * 8);
    vdp_slowset(0x00, 8);
    EI;
    waitVB(); 
    vdp_unblank();
}

void flashing_handler()
{
    // some flashing before going to the first part
//    while(music_sync_t<0xA && !quit)
    {
//        waitVB();
        if (music_channel_0_level >= 0xE0)
            vdp_register(VDP_COLOR,RED);
        else
        if (music_channel_0_level >= 0xD0)
            vdp_register(VDP_COLOR,LIGHT_RED);
        else
            vdp_register(VDP_COLOR,WHITE);
        
//        if(space())
//            quit=1;
    }
}

void main(void)
{
    int quit=0,circinit=0,
        n,i,j,x,y,n2,
        counter=0,offi=0,copyoff=0;
    char f_balkkar=1,f_circle=1,f_blind=1,f_tunnel=1,f_kube=1,mode=0,
         *curlogo=logogreet,sfade=0;

    screen(2);
    vdp_register(VDP_COLOR,BLACK);
    vdp_register(VDP_MODE1,MODE1_IE+MODE1_VRAM+MODE1_BLANK+
                 MODE1_SPRITE_SZ+MODE1_SPRITE_MAG);

    // Common to all the sprites
    DI;
    for(n=0;n<32;n++)
        sprite_pat(n,n*4);
        sprite_pos(n,0,208);
    EI;

    // Read the first img
    readfile("atsaas  sc2",bitmap); // Yes it needs to be like this

    // Some inits meanwhile
    music_init();
    install_isr(my_isr); // Music starts playing NOW!!

    waitVB();
    show_bitmap_nicely();

    memset(sqrtt,255,2624);

    tunnel_init();
    blind_init();

    // 1st "part"
    counter=0;
    while(music_sync_t<0x9 && !quit)
    {
        waitVB();
        if(space())
            quit=1;
    }

    fade_out_nicely(0xFF);

    // some flashing while running the initialization part
    DI;
    vbrun = flashing_handler;
    EI;
    
    waitVB();
    
    // Duplicate pattern table
    vdp_register(VDP_PATT_T,0);
    
    // Something nicer than randon garbage
    put_some_nongarbage_on_screen();
    
    waitVB();
    vdp_blank(); // blank screen. unblank when all necessary stuff is on the screen
    
    // The main loop
    while(!space() && !quit)
    {
        waitVB();

        // Tunnel(ish)
        if(music_sync_t>=0x9 && music_sync_t<0xd) 
        // The A part of the song starts in music sync position T=A, but we start running the tunnel part in advance.
        // While the tunnel part is initializing, the screen is blanked, and the VBI hook flashes the background color
        // in rhythm with the music.
        {
            if (music_sync_t==0xA) // stop flashing when music's A part starts
            {
                DI;
                vbrun = 0; // clear function pointer
                EI;
            }
            
            if(f_tunnel && music_sync_t >= 0xA)
            {
                vdp_unblank(); // NOW the screen and sprite subsystem are activated again 
                vdp_register(VDP_COLOR,WHITE);
                spriteput(spitex,0);
                fadeoverlay(FADEIN);
                f_tunnel=0;
            }
            tunnel((music_sync_t >= 0xA), (music_sync_t >= 0xC) && (music_sync_b >= 0xE));
            fadeoverlay(0);
            if(music_sync_a>=0x1B && !sfade)
            {
                sfade=1;
                fadeoverlay(FADEOUT);
            }
        }

        // Colorbars
        if(music_sync_t>=0xd && music_sync_t<0x12)
        {
            if(circinit==0) // Multitasking!
            {
                vdp_register(VDP_COLOR,BLACK);
                spriteput(spitex6,1);
                fadeoverlay(FADEIN);
                sfade=0;

                balkkar();
                DI;
                vbrun=balkkar;
                EI;
                circles_init();
                circinit=1;
            }
            if(music_sync_b>=0x16 && !sfade)
            {
                sfade=1;
                fadeoverlay(FADEOUT);
            }
        } // Colorbars

        // Circles
        if(music_sync_t>=0x12 && music_sync_t<0x15)
        {
            if(vbrun!=0)
            {
                DI;
                vbrun=0;
                EI;
            }
            if(f_circle)
            {

                vdp_blank();
                circles(0); // first call initializes the part               
                waitVB();
                vdp_unblank();
                
                vdp_register(VDP_COLOR,RED);
                spriteput(spitex4,0);
                fadeoverlay(FADEIN);
                sfade=0;
    
                f_circle=0;
            }
            circles(music_sync_b >= 0x1C);
            fadeoverlay(0);
            if(music_sync_a>=0x37 && !sfade) // Possible locations for fade: A:37, B:1C, A:38
            {
                sfade=1;
                fadeoverlay(FADEOUT);
            }
        } // Circles

        // Blind + greets
        if(music_sync_t>=0x15 && music_sync_t<0x1a)
        {
            if(f_blind)
            {
                vdp_register(VDP_COLOR,WHITE);
                spriteput(0,0);
                f_blind=0;

                waitVB();
                for(n=0;n<12;n++)
                {
                    DI;
                    sprite_col(n,BLACK);
                    EI;
                }

                // Precalc flight paths
                y=192;
                for(n=n2=0;n<STEPS;n++,n2+=24)
                {
                    x=64+(sini[n*4&255]>>1);
                    y=70;
                    if(n<64)
                        y=196-sini[n];
                    if(n>=STEPS-64)
                        y=196-sini[STEPS-n];
                        
                    for(i=0;i<12;i++)
                    {
                        flight[n2+(i<<1)+1]=x+((i&3)<<5);
                        j=y+((i>>2)<<5);
                        if(j>192)
                            j=192;
                        flight[n2+(i<<1)]=j;
                    }
                }
            }
            if(mode)
            {
                setsprites12(&flight[offi]);
                offi+=12*2;
                if(offi>=STEPS*12*2)
                {
                    offi=0;
                    mode=0;
                }
				blind();
            }
			else
            {
				blind();
                vdp_address(0x3800+copyoff);
                vdp_slowcopy(&curlogo[copyoff],16);
                copyoff+=16;
                if(copyoff==12*32)
                {
                    mode=1;
                    copyoff=0;
                    if(curlogo==logogreet) {curlogo=logobw; goto switchimuka;}
                    if(curlogo==logobw) {curlogo=logob; goto switchimuka;}
                    if(curlogo==logob) {curlogo=logohede; goto switchimuka;}
                    if(curlogo==logohede) {curlogo=logorno; goto switchimuka;}
                    if(curlogo==logorno) {curlogo=logowam; goto switchimuka;}
                    if(curlogo==logowam) {curlogo=logozen; goto switchimuka;}
                    if(curlogo==logozen) {curlogo=bitmap; goto switchimuka;}
                    if(curlogo==bitmap) mode=0; // Bogus value, last text
                    switchimuka:;
                }
            }
        }

        // Cubes
        if(music_sync_t>=0x1a)
        {
            if(f_kube)
            {
                vdp_blank();
                kube(); // first call = initialization
                waitVB();
                vdp_unblank();

                vdp_register(VDP_COLOR,WHITE);
                spriteput(spitex5,1);
                fadeoverlay(FADEIN);
                sfade=0;
    
                f_kube=0;
            }
            kube();
            fadeoverlay(0);
            if(music_sync_t>=0x22 && !sfade)
            {
                sfade=1;
                fadeoverlay(FADEOUT);
            }
        }
        if(music_sync_t>=0x23)
        {
            if(counter>5)
                quit=1;
            counter++;
        }
    }

    // Simple fade out
    for(n=23;n>=0;n--)
    {
        waitVB();
        waitVB();
        if(n==23)
        {
            vdp_address(255*8);
            vdp_slowset(0,8);
            vdp_address(0x2000+255*8);
            vdp_slowset(BLACK,8);
            vdp_address(0x2000+255*8+256*8);
            vdp_slowset(BLACK,8);
            vdp_address(0x2000+255*8+256*8*2);
            vdp_slowset(BLACK,8);
        }
        for(i=0;i<32;i++)
        {
            vdp_poke(0x1800+n*32+i,255);
        }
    }

    uninstall_isr();
    music_stop_sound();

    waitVB();
    spriteput(0,1);
    vdp_register(VDP_COLOR,BLACK);
    vdp_blank();

    // And the endpic loader
    readfile("endpic  sc2",bitmap); // Yes it needs to be like this
    DI;
    vdp_register(VDP_PATT_T,3);
    for(n=0;n<256*3;n++)
        vdp_poke(0x1800+n,n);
    EI;

    show_bitmap_nicely();

    for(n=0;n<20*50;n++)
    {
        if(space())
            break;
        waitVB();
    }

    vdp_register(VDP_COLOR,DARK_BLUE);
    fade_out_nicely(0);
    
    screen(0);
}
