#!/usr/bin/perl
# 
# Copyright © 2015-2020 by Vincent Slyngstad
# 
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
# 
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS LISTED ABOVE BE LIABLE
# FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# 
# Except as contained in this notice, the names of the authors above
# shall not be used in advertising or otherwise to promote the sale, use
# or other dealings in this Software without prior written authorization
# from those authors.
#
# Explode the contents of a PQS8 file-system into the 
# various files it contains.
# BUGBUG: We aren't yet writing XML instructions to
# facilitate reconstructing the media image.

#
# Not much is researched yet about dates in PQS8.  I hope
# to be able to extract the date from the keyboard monitor 
# image in blocks 001-017, and eventually at least 
# use that to set the timestamps of the extracted files.
# Meanwhile, this date stuff is just cruft.
#
# The date algorithm stores an epoch, multiplied by 8 and 
# added to 1960.  First, get the right epoch.
# BUGBUG: There's also an epoch on the media, which is the
# one we actually end up using.
($_, $_, $_, $dy, $mo, $i) = localtime(time);
$i = ($i + 1900) - 1960;
$epoch = int($i/8);
$cyear = $i & 07;
$i += 60;
@month = ("0",  "JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL",
	 "AUG", "SEP", "OCT", "NOV", "DEC", "13",  "14",  "15");
#print " $dy-$month[$mo+1]-$i\n\n";

#
# Open the disk image to be examined.  Media specific tools are 
# used to convert media into simple 12 bit words, zero extented 
# to 16 bits, and stored in two bytes in Perl's packed 'S' format.
# Consecutive words form blocks, and blocks are stored in logical 
# order, not the physical order of the original media.
open(INPUT, $ARGV[0]) || die "$ARGV[0]: $!";
binmode(INPUT);

#
# Check for OS/8 or COS, and bail if this doesn't appear to be
# a PQS8 image..
# Read first OS/8 directory segment
$link = 001; $fsbase = 0; $bsize = 2*0400;
seek(INPUT, $bsize*($fsbase+$link), 0) || die "seek($dsk): $!";
read(INPUT, $buf, $bsize) || die "read($dsk): $!";
@blk = unpack("S512", $buf);
$nent = shift @blk;
$sblk = shift @blk;
# If it has files in the system area, it's not a system device.
$sys = 0 if $sblk < 070; # Not a system image
$cos = 0 if $sblk < 0140; # Not a COS image
$nent = 010000 - $nent;
$link = shift @blk;
$tent = shift @blk;
$aiw = shift @blk;
$aiw = 010000 - $aiw unless $aiw == 0;
die "$ARGV[0] looks like OS/8\n" if $sblk == 0007;
die "$ARGV[0] looks like OS/8\n" if $sblk == 0070;
die "$ARGV[0] looks like COS\n" if $sblk == 0140;

#
# Make sure the output directory exists.
$fsname = $ARGV[0]; $fsname =~ s/[.]dsk$//;
$fsname .= ".0";
mkdir $fsname unless -d $fsname;

#
# PQS8 stores the version name, epoch, etc. in block 001.
# We look that up here, in case we need it later
# to accomodate changes that were made in where 
# to find things, etc.
$link = 001;
seek(INPUT, $link*0400, 0) || die "$ARGV[0] directory seek: $!";
read(INPUT, $dir, 1*0400) || die "$ARGV[0] directory read: $!";
# 0400 bytes = 128 words
@dir = unpack("S128", $dir);
# It's at offset 012 or offset 020, depending on the
# version.  We don't know the version yet, so we don't
# know where to look.
$epoch = $dir[011];
$n1 = $dir[012];
$n1 = $dir[020] if (($n1>>6)&~1) != 070;
if ((($n1>>6)&~1) != 070) {
    print "$ARGV[0] is not bootable PQS8\n\n";
}
@name = ($n1>>6, $n1&077);
grep($_ = ($_ >= 040? $_ : $_ + 0100), @name, @ext);
$version = pack("CC", @name);
print "$ARGV[0] is PQS8 V$version\n\n";

#
# The CATalog lives in blocks 015 and 016 (which is 
# loaded during boot).
$link = 015;
# Read in the CATalog (two 0400 byte blocks).
seek(INPUT, $link*0400, 0) || die "$ARGV[0] directory seek: $!";
read(INPUT, $dir, 2*0400) || die "$ARGV[0] directory read: $!";
# 2*0400 = 01000 bytes = 0400 (256) words
@dir = unpack("S256", $dir);
#
# The last 3 words are magic.
$start = $dir[255];
$start = pop(@dir);
$end = pop(@dir);
$_ = pop(@dir); # Remove funky redundant count
$slots = int(($end-$start) / 16);
if ($slots <= 0) {
    print "Not PQS8: CATalog corrupt\n\n";
    exit 1;
}
#
# Walk the CATalog, identifying files and extracting them.
$slot = $used = 0;
while (@dir) {
    $date = "";
    $n1 = shift @dir;
    $n2 = shift @dir;
    $n3 = shift @dir;
    last if $slot++ == $slots;
    if ($n1) {
	$length = 16; # Length of these is always 16x128 words.
	@name = ($n1>>6, $n1&077, $n2>>6, $n2&077, $n3>>6, $n3&077);
	grep($_ = ($_ >= 040? $_ : $_ + 0100), @name, @ext);
	$name = pack("CCCCCC", @name);
	$name =~ s/@//g;
        # Deleted files which have not been over-written have their
	# first two letters replaced with '??'.  This leads to name
	# collisions and lost data when the results are not unique
	# within the CATalog.  We work around that by tacking on the 
	# block number as an extension, for deleted files.
        $name .= ".$start" if $name =~ /^[?]/;
	$used++;
    } else {
	$length = 16;
	$name = "<empty>";
	$date = "";
    }
    &catfile($name, $start, $length, $date) if $n1;
#   printf "%-9s\t%04o\t%d\t%s\n", $name, $start, $length, $date if $n1;
    $start += $length;
}
#print "\nUsed $used of $slots slots\n";

#
# Here we go:
# At offset 03777 is the line number pointer, offset by 03000.
# At offset 03776 is the text pointer, offset by 03000.
# The file is full when these collide.
# Line number entries are two words each, moving down from 03775.
# The odd addressed word is a line number.
# The even addressed word is a pointer to the text, offset by 03000.
# Actual text is packed in sixbit.
sub catfile {
    local($name, $start, $length, $date) = @_;
#   warn "catfile($name, $start, $length, $date);\n";
    die unless $length == 16;

    # The file is small, so just read in the whole thing.
    # 0400 is used here, since each word is two bytes.
    seek(INPUT, $start*0400, 0) || die "$ARGV[0] file seek: $!";
    # Read the media.  If that fails, it is usually because SIMH didn't
    # bother to write the uninitialized block.  Fabricate the empty blocks.
    read(INPUT, $buf, $length*0400) || ($buf = pack("S2048", 0));
    @buf = unpack("S2048", $buf);
    #
    # Check for a valid text file.
    $text = 1;
    $linenop = $buf[03777];
    $textp = $buf[03776];
    $text = 0 if $textp < 03000;
    $text = 0 if $textp > $linenop;
    $text = 0 if $linenop > 06777;
    $line = 0; # First line number must be > 0!
    $linenop -= 03000;
    # Check that the line numbers are monotonic.
    for ($linep = 03775; $linep > $linenop; $linep -= 2) {
	$text = 0 if $line >= $buf[$linep]; # Not Monotonic
	$text = 0 if $textp < $buf[$linep-1]; # Implausible text location
	$line = $buf[$linep];
    }
    # Open a file for our results.
    open(OUTPUT, ">$fsname/$name") || die "$fsname/$name: $!";
    binmode(OUTPUT);
    # If not a text file, just write the blocks to a file so we can look at them.
    if (!$text) {
        print OUTPUT $buf || die "file write: $!";
        return;
    }
    #
    # We've got ourselves a text file.  Unpack the beast.
    for ($linep = 03775; $linep > $linenop; $linep -= 2) {
        last if $buf[$linep] == 07777;
        printf OUTPUT "%4d ", $buf[$linep];
        for ($textp = $buf[$linep-1] - 03000; ; $textp++) {
	    @c = ($buf[$textp]>>6, $buf[$textp]&077);
	    grep($_ = ($_ >= 040? $_ : $_ + 0100), @c);
            last if $c[0] == 0100;
	    $c[0] = 011 if $c[0] == 0137;
	    print OUTPUT pack("C", $c[0]);
            last if $c[1] == 0100;
	    $c[1] = 011 if $c[1] == 0137;
	    print OUTPUT pack("C", $c[1]);
        }
        print OUTPUT "\r\n";
    }
}

#
# This is a table of file types for System DIRECTory.
@typ = (
  "          ", # 0
  "GENO  GENI", # 1
  "      BIN ", # 2
  "BIN   BIN ", # 3
  "BIN       ", # 4
  "ASC   ASC ", # 5
  "ASC       ", # 6
  "BIN   PAL ", # 7
  "PAL   PAL ", # 8
  "      BAT ", # 9
  "      DUMP", # 10
  "      FOC ", # 11
  "PAL       ", # 12
  "      PAL ", # 13
);

#
# System DIRECTory starts in block 063.
$link = 063;
#
# Read in the System DIRECTory (four 0400 byte blocks).
seek(INPUT, $link*0400, 0) || die "directory seek: $!";
read(INPUT, $dir, 4*0400) || die "directory read: $!";
# 4*0400 = 02000 bytes = 01000 (512) words
@dir = unpack("S512", $dir);
$slot = $used = 0;
$slots = 85;
#   print "\n Name\tBlock\tSize\tLoad\tLength";
#   print "\t Out   In" if $version gt '8T';
#   print "\tR S O\tStart\n";
$size = 0;
while (@dir) {
    # Each entry is six words long.
    (@meta) = ($dir[0], $dir[1], $dir[2], $dir[3], $dir[4], $dir[5]);
    $n1 = shift @dir;
    $n2 = shift @dir;
    $n3 = shift @dir;
    $start = shift @dir; # Word 4
    $laddr = shift @dir; # Word 5
    $length = $laddr & 037;
    $files = !!($laddr & 0100);
    $expr = !!($laddr & 040);
    $laddr &= 07600;
    $saddr = shift @dir; # Word 6
    $size = $dir[3] - $start unless $start == $dir[3];
    $size = $length if $dir[3] == 0;
    $ifiles = !!($saddr & 0100);
    # See "Figure 1" for meaning of $typ.
    $typ = $typ[$saddr & 037];
    $typ = "         " unless defined $typ;
    $saddr &= 07600 if $version gt '8T';
    if ($n1) {
	@name = ($n1>>6, $n1&077, $n2>>6, $n2&077, $n3>>6, $n3&077);
	grep($_ = ($_ > 040? $_ : $_ + 0100), @name, @ext);
	$name = pack("CCCCCC", @name);
	$name =~ s/@//g;
	$used++;
    } else {
	$name = "<empty>";
	next;
    }
    # Open a file for our results.
    open(OUTPUT, ">$fsname/$name.sd") || die "$fsname/$name: $!";
    binmode(OUTPUT);
    # Write the directory entry here too, as it has important metadata
    # that isn't available elsewhere.
    print OUTPUT pack("S6", @meta);
    # The file is small, so just read in the whole thing.
    seek(INPUT, $start*0400, 0) || die "$ARGV[0] file seek: $!";
    read(INPUT, $buf, $length*0400) || die "$ARGV[0] file read: $!";
    # Write the file.
    print OUTPUT $buf || die "file write: $!";

#   printf "%-6s\t%04o\t%03o\t%04o\t  %02o",  $name, $start, $size, $laddr, $length;
#   printf "\t%s", $typ if $version gt '8T';
#   printf "\t%o %o %o\t%05o\n", $expr, $files, $ifiles, $saddr;
    $start += $length;
}
#print "\nUsed $used of $slots slots\n\n";

# BUGBUG TODO
sub writefile {
    local($first, $last, $name) = @_;

    # Open a file for our results.
    open(OUTPUT, ">$fsname/$name") || die "$fsname/$name: $!";
#warn "writefile($first, $last, '$name')\n";
    binmode(OUTPUT);
    # Copy out the file.  These are binary, so leave the words in DSK format.
    for (; $first <= $last; $first++) {
      # Use 0400 here, since each word is two bytes.
      seek(INPUT, $first*0400, 0) || die "$ARGV[0] file seek: $!";
      read(INPUT, $buf, 0400) || die "$ARGV[0] file read: $!";
      # Write the file.
      print OUTPUT $buf || die "file write: $!";
    }
}

#
# Now do the reserved system areas:
# Block		Use
# 0000-0000	BOOTSTRAP, SYSTEM I/O, ETC.
# 0001-0013	KEYBOARD MONITOR
# 0014-0014	THE "EDIT" COMMAND
# 0015-0016	USER CATALOG
# 0017-0017	TTY: EDITING FEATURES, ETC.
# 0020-0037	"%" SCRATCH FILE
# 0040-0057	"$" SCRATCH FILE
# 0060-0060	/I BLOCK
# 0061-0062	EXTENDED COMMAND PROCESSOR
# 0063-0066	SYSTEM DIRECTORY
# 0067-0071	EXTENDED USER'S CATALOG
# 0072-0075	"R", "RU" COMMAND PROCESSOR
# 0076-0077	RESERVED FOR EXPANSION
# 0100-0100	SLURP LOADER BLOCK
# 0101-0102	EXTENDED HANDLER INITIALIZATION
# 0103-0112	EXTENDED HANDLER CODE
# 0113-0114	I/O HANDLER TRAP BLOCKS
# 0115-0115	BATCH RECOVERY BLOCK
# 0116-0116	ACTIVE COPY OF BATCH SAVED IN THIS BLOCK
# 0117-0117	LAST BATCH BUFFER SAVED IN THIS BLOCK
# 0120-0120	SYSTEM I/O, SWITCHES, FILES SAVED HERE
# 0121-0121	PAGE ZERO USER SETTINGS SAVED HERE
# 0122-0122	RESERVED FOR HANDLER DIRECTORY
# 0123-0162	RESERVED FOR HANDLERS
# 0163-0200	ODT SYSTEM
# 0201-0201	BATCH SYSTEM ONCE-ONLY CODE
# 0202-0202	BATCH CODE
# 0203-0215	BIN/GET/START SYSTEM
# 0216-0231	SCRATCH AREA FOR VIRTUAL LOADER
# 0232-0240	DUMP SYSTEM
# 0241-0243	CHANGE/FIND SYSTEM
# 0244-0244	FIRST BLOCK FOR DIRECTORY FILES
&writefile(0000, 0000, ".boot");
&writefile(0001, 0013, ".kbm");
&writefile(0014, 0014, ".edit");
&writefile(0015, 0016, ".cat");
&writefile(0017, 0017, ".tty");
&writefile(0020, 0037, ".percent");
&writefile(0040, 0057, ".dollar");
&writefile(0060, 0060, ".iblock");
&writefile(0061, 0062, ".ecp");
&writefile(0063, 0066, ".dir");
&writefile(0067, 0071, ".euc");
&writefile(0072, 0075, ".run");
&writefile(0076, 0077, ".reserved");
&writefile(0100, 0100, ".slurp");
&writefile(0101, 0102, ".ehinit");
&writefile(0103, 0112, ".ehcode");
&writefile(0113, 0114, ".iohtrap");
&writefile(0115, 0115, ".brecovr");
&writefile(0116, 0116, ".batch");
&writefile(0117, 0117, ".bbuffer");
&writefile(0120, 0120, ".system");
&writefile(0121, 0121, ".pzero");
&writefile(0122, 0122, ".hdir");
&writefile(0123, 0162, ".handlers");
&writefile(0163, 0200, ".odt");
&writefile(0201, 0201, ".bonce");
&writefile(0202, 0202, ".bcode");
&writefile(0203, 0215, ".binget");
&writefile(0216, 0231, ".scratch");
&writefile(0232, 0240, ".dump");
&writefile(0241, 0243, ".change");

# BUGBUG: Do the handler directory too
# BUGBUG: Do the Extended directory too

exit 0;