#!/usr/bin/perl # # Copyright © 2015-2025 by Vincent Slyngstad # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS LISTED ABOVE BE LIABLE # FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # Except as contained in this notice, the names of the authors above # shall not be used in advertising or otherwise to promote the sale, use # or other dealings in this Software without prior written authorization # from those authors. # # Explode the contents of a DIAL file-system into the # various files it contains. # BUGBUG: We aren't yet writing XML instructions to # facilitate reconstructing the media image. # # Not much is researched yet about dates in DIAL. There's # no evidence that there was a notion of system or file # dates, AFAIK. # Probably, this date stuff is just cruft. ($_, $_, $_, $dy, $mo, $i) = localtime(time); $i = ($i + 1900) - 1960; $epoch = int($i/8); $cyear = $i & 07; $i += 60; @month = ("0", "JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC", "13", "14", "15"); #print " $dy-$month[$mo+1]-$i\n\n"; # # Open the disk image to be examined. Media specific tools are # used to convert media into simple 12 bit words, zero extented # to 16 bits, and stored in two bytes in Perl's packed 'S' format. # Consecutive words form blocks, and blocks are stored in logical # order, not the physical order of the original media. open(INPUT, $ARGV[0]) || die "$ARGV[0]: $!"; binmode(INPUT); # # Check for OS/8 or COS, and bail if this doesn't appear to be # a DIAL image. # Read first OS/8 directory segment $link = 001; $fsbase = 0; $bsize = 2*0400; seek(INPUT, $bsize*($fsbase+$link), 0) || die "seek($dsk): $!"; read(INPUT, $buf, $bsize) || die "read($dsk): $!"; @blk = unpack("S512", $buf); $nent = shift @blk; $sblk = shift @blk; # If it has files in the system area, it's not a system device. $sys = 0 if $sblk < 070; # Not a system image $cos = 0 if $sblk < 0140; # Not a COS image $nent = 010000 - $nent; $link = shift @blk; $tent = shift @blk; $aiw = shift @blk; $aiw = 010000 - $aiw unless $aiw == 0; die "$ARGV[0] looks like OS/8\n" if $sblk == 0007; die "$ARGV[0] looks like OS/8\n" if $sblk == 0070; die "$ARGV[0] looks like COS\n" if $sblk == 0140; # # Make sure the output directory exists. $fsname = $ARGV[0]; $fsname =~ s/[.]dsk$//; $fsname .= ".0"; mkdir $fsname unless -d $fsname; # # A DIAL tape volume has this basic format: # # Block Function # ----- -------- # 000-267 File Storage # 277 Free # 300-367 DIAL # 370-467 Working Area # 470-777 File Storage # # The DIAL area breaks down thus: # 300-321 EDITOR # 322-345 ASSEMBLER # 346-347 DIAL Index # 350-353 FILECOMS # 354-355 LOADER # 356-360 SAVE BINARY # 361-362 PX, DX # 363 TTY # 364 PS # 365 UNUSED # 367 SCRATCH # # TODO: Research disk layout for DF32/RS08/RK08, and # save them with appropriate names. # # For now, we just carve the media into 1000 block units and # dump each one. # $bsize = 0400 * 2; $vsize = -s $ARGV[0]; $vsize /= $bsize; # In blocks $parts = $vsize / 1000; for ($unit = 0; $unit <= $parts; $unit++) { $voffset = $unit * 1000; # Read the index. &readindex; #printf "\n$ARGV[0]\n"; #printf "%s Name\tTBLK\tTLEN\tBBLK\tBLEN\n", $lap6? "LAP6": "DIAL"; #printf "---------\t----\t----\t----\t----\n"; while (@dir) { # # Shift the next directory entry. $name1 = shift @dir; $name2 = shift @dir; $name3 = shift @dir; $name4 = shift @dir; $stxt = shift @dir; $ctxt = shift @dir; $sbin = shift @dir; $cbin = shift @dir; if ($lap6) { $quarter = $sbin >> 9; $sbintrim = $sbin & 0777; } else { $quarter = 'd'; $sbintrim = $sbin; } # # Pack up the file name. File names depend on @sixbit. @name = ($name1>>6, $name1&077, $name2>>6, $name2&077, $name3>>6, $name3&077, $name4>>6, $name4&077); grep($_ = $sixbit[$_], @name); $name = join("", @name); $name =~ y/A-Z/a-z/; # Don't shout $name =~ s/[?]*$//; # Remove trailing "?" $name =~ s/[\032]*$//; # Remove trailing EOF next if $name eq '////////'; # Empty next if ($lap6 == 2) && $name =~ /^\//; $name =~ s:^//:..:; # Deleted file, write as hidden # # Process text and binary versions, as found. if ($stxt != 05757) { &writetxt($stxt, $stxt+$ctxt-1, "$name.tx"); if ($stxt > $dirblk) { $distance{"$name.tx"} = $stxt - $dirblk; } else { $distance{"$name.tx"} = $dirblk - ($stxt+$ctxt-1); } $block{"$name.tx"} = $sbintrim } # BUGBUG: These are currently written as one word per short, # which is readable but not efficient. Could they reasonably # be converted to .sv format or something similar? if ($cbin != 05757) { $cbin &= 0777; &writefile($sbintrim, $sbintrim+$cbin-1, "$name.b$quarter"); # Remember the file and how far from the index it was. if ($sbintrim > $dirblk) { $distance{"$name.b$quarter"} = $sbintrim - $dirblk; } else { $distance{"$name.b$quarter"} = $dirblk - ($sbintrim+$cbin-1); } $block{"$name.b$quarter"} = $sbintrim } # # # # Print the directory. # $name =~ y/a-z/A-Z/; # printf "%-8s\t", $name; # if ($stxt == 05757) { # printf " \t \t"; # } else { # printf "%4o\t%4o\t", $stxt, $ctxt; # } # if ($cbin == 05757) { # printf " \t \n"; # } else { # printf "%4o\t%4o\n", $sbin, $cbin; # } # printf "%-8s\t0%04o\t0%04o\n", "${name}.tx", $stxt, $ctxt # unless $stxt == 05757; # printf "%-8s\t0%04o\t0%04o\n", "${name}.bd", $sbintrim, $cbin # unless $cbin == 05757; } # # Now we have the information we need to write the .order file. # Sort by distance. If identical sort by tape block to have defined order sub bydistance { $distance{$a} <=> $distance{$b} || $block{$a} <=> $block{$b} } open(OUTPUT, ">$fsname/.order") || die "$fsname/.order: $_"; foreach $f (sort bydistance keys %distance) { print OUTPUT "$f\n"; } } # # Read the specified block, taking into account $boff and $bsize. sub getblk { local($blk) = @_; seek(INPUT, ($blk+$voffset) * $bsize, 0) || die "blk seek: $!"; read(INPUT, $buf, $bsize) || die "blk read: $!"; return unpack("S*", $buf); } # # DIAL binary files have an extra header block prepended, which # looks like this: # A word with the mode of the binary. # A word with the field of the start address. # A word with unknown data. # A word with the starting address. # 256-32-5 = 219 filler words. # The length of the binary, in blocks. # A 32 word array, non-zero if the corresponding pair of pages is to # be loaded from the next block. # # This is followed by up to 32 additional blocks which are actually loaded, # in sequential order. # # # Here we go: # Actual text is packed in sixbit, with substitutions. sub writetxt { local($start, $end, $name) = @_; #warn "writetxt($start, $end, $name)\n"; @text = (); while ($start <= $end) { # Read in the text. seek(INPUT, $start*$bsize, 0) || die "$ARGV[0] file seek: $!"; # Read the media. If that fails, it is usually because SIMH didn't # bother to write the uninitialized block. Fabricate the empty blocks. read(INPUT, $buf, $bsize) || ($buf = pack("S$bsize", 0)); push(@text, unpack("S*", $buf)); $start++; } # Open a file for our results. open(OUTPUT, ">$fsname/$name") || die "$fsname/$name: $!"; binmode(OUTPUT); if (@text >= 2 && $lap6) { $word1 = shift @text; $word2 = shift @text; if ($word1 != 02065 || $word2 != 05712) { print "Invalid manuscript header on $name: $word1 $word2\n"; unshift(@text, $word2); unshift(@text, $word1); } } # # We've got ourselves a text file. Unpack the beast. while (@text) { $word = shift @text; $c1 = $sixbit[$word >> 6]; last if $c1 eq "\032"; # EOF print OUTPUT $c1; $c2 = $sixbit[$word & 077]; last if $c2 eq "\032"; # EOF print OUTPUT $c2; } } sub writefile { local($first, $last, $name) = @_; # Open a file for our results. open(OUTPUT, ">$fsname/$name") || die "$fsname/$name: $!"; #warn "writefile($first, $last, '$name')\n"; binmode(OUTPUT); # Copy out the file. These are binary, so leave the words in DSK format. for (; $first <= $last; $first++) { seek(INPUT, $first*$bsize, 0) || die "$ARGV[0] file seek: $!"; # read(INPUT, $buf, $bsize) || die "$fsname/$name file read: $!"; read(INPUT, $buf, $bsize) || ($buf = pack("S$bsize", 0)); # Write the file. print OUTPUT $buf || die "file write: $!"; } } # # The DIAL area breaks down thus: # 300-321 EDITOR # 322-345 ASSEMBLER # 346-347 DIAL Index # 350-353 FILECOMS # 354-355 LOADER # 356-360 SAVE BINARY # 361-362 PX, DX # 363 TTY # 364 PS # 365 UNUSED # 366 BUGBUG: What's this? # 367 SCRATCH # if ($lap6) { # Infer the location of .lap6 from $dirblk. $flap6 = $dirblk - 0126; # .lap6 is always at a multiple of 0100. $flap6 &= ~077; &writefile($flap6, $flap6+047, ".lap6"); # The current manuscript lives between .lap6 # and $dirblk. &writetxt($flap6+050, $dirblk-1, ".manuscript"); # And of course the Index. &writefile($dirblk, $dirblk+1, ".index"); # Deduce the extent of the lower and upper # unused areas, if any. We read a block of the FILECOMS # section of .flap6, extracting the relevant boundaries # from the binary. (One is stored complemented.) @fcoms = &getblk($flap6+021); $lu = $fcoms[0166]; $lu = $flap6 if $lu > $flap6; $uf = 07777 & ~$fcoms[0154]; $uf = $dirblk+2 if $uf > 01400; #printf STDERR "lu=%o uf=%o\n", $lu, $uf; # Save the content of the lower and upper # "unused" areas, if any. &writefile($lu, $flap6-1, '.unused1'); &writefile($dirblk+2, $uf-1, '.unused2'); # # Note the beginning block numbers as the configuration. open(OUTPUT, ">$fsname/.config") || die "$fsname/.config: $_"; printf OUTPUT "%04o %04o %04o %04o %04o %04o %04o\n", 0, $lu, $flap6, $flap6+050, $dirblk, $dirblk+2, $uf; } else { # BUGBUG: TODO: Check that these locations are constant. &writefile(0300, 0321, ".edit"); &writefile(0322, 0345, ".asmblr"); &writefile(0346, 0347, ".index"); &writefile(0350, 0353, ".filecoms"); &writefile(0354, 0355, ".loader"); &writefile(0356, 0360, ".svbinary"); &writefile(0361, 0362, ".pxdx"); &writefile(0363, 0363, ".tty"); &writefile(0364, 0364, ".ps"); &writefile(0365, 0366, ".unused"); &writefile(0367, 0367, ".scratch"); } # # LAP6 documentation states that the index may be at any block # number ending in 6, and continues into the block whose number # ends in 7. Im practice, LAP6 distributions provided a standard # configuration and ten alternate configurations. Those are the # ones we check for: # 0426 LAP6 Standard Configuration # 0426 LAP6 Config 1 (Unit 1 boot) # 0426 LAP6 Config 2 (No working area) # 0136 LAP6 Config 3 (No lower File Area, Unused after Index) # 0426 LAP6 Config 4 (Unused near Index) # 0426 LAP6 Config 5 (Unused near Index, Unit 1 boot) # 0326 LAP6 Config 6 (Unused between lower File Area and Index) # 0226 LAP6 Config 7 (No lower File Area, Unused at 0-77) # 0136 LAP6 Config 8 (No lower File Area, No Unused area) # 0136 LAP6 Config 9 (No lower File Area, Unused after Index) # 0426 LAP6 Config 10 (No lower File Area, Unused at 0-277) # We also check for a couple of DIAL index locations seen in the field. # # Side Effects: # We set the globals "$lap6" and "@sixbit" to match what # OS we think we've found. The global "@dir" is set to the # actual index content. # # Notes: # The WorkSpace is always right before the Index, and LAP6 # is always right before that. Apparently, the WS can be 056 # blocks or 066 blocks. LAP6 starts at 0000, 0100, 0200, or 0300. # Is it acceptable to absorb Unused space into the upper or lower # file regions? Unused areas always follow the Lower File area # or precede the Upper File area. (The latter is detectable.) # Sizes for L. unused areas are 0010, 0300, 0040, 0100, or 0300. # Sizes for U. unused areas are 0040, or 0100. # TODO: # We don't differentiate configurations with their Index # in the same place. We'll need to do that to implement # the explode function well. sub readindex { $lap6{0346} = 0; # DIAL $lap6{0356} = 0; # DIAL $lap6{0426} = 1; # LAP6 $lap6{0136} = 1; # LAP6 $lap6{0326} = 1; # LAP6 $lap6{0226} = 1; # LAP6 $lap6{0476} = 1; # LAP6 $lap6{0376} = 2; # LAP6 # lasner, nagel $lap6{0476} = 1; # LAP6 # eeg1 foreach $k (keys %lap6) { $dirblk = $k; $lap6 = $lap6{$dirblk}; @dir = &getblk($dirblk); next if ($dir[0] != 05757) ; #|| ($dir[1] != 05757) if ($lap6 == 2) { # next if $dir[1] != 0; } else { next if ($dir[1] != 05757) || ($dir[2] != 05757) || ($dir[3] != 05757); } #warn sprintf "Index found in block %04o\n", $dirblk; if ($lap6) { @sixbit = ( "0", "1", "2", "3", "4", "5", "6", "7", # 00x "8", "9", "\n","\033"," ", "i", "p", "-", # 01x "+", "|", "#", "\031","A", "B", "C", "D", # 02x "E", "F", "G", "H", "I", "J", "K", "L", # 03x "M", "N", "O", "P", "Q", "R", "S", "T", # 04x "U", "V", "W", "X", "Y", "Z", "\r","/", # 05x "?", "=", "u", ",", ".", "\$","[", "_", # 06x '"', "'", "<", ">", "]", "*", ":", "\032",# 07x ); } else { @sixbit = ( "\032","A", "B", "C", "D", "E", "F", "G", # 00x "H", "I", "J", "K", "L", "M", "N", "O", # 01x "P", "Q", "R", "S", "T", "U", "V", "W", # 02x "X", "Y", "Z", "[", "\\","]", "^", "\r", # 03x " ", "!", '"', "\n","\$","%", "?", "\t", # 04x "(", ")", "*", "+", ",", "-", ".", "/", # 05x "0", "1", "2", "3", "4", "5", "6", "7", # 06x "8", "9", ":", ";", "<", "=", ">", "?", # 07x ); } push(@dir, &getblk($dirblk+1)); return; } die "$ARGV[0]: No DIAL or LAP6 directory found.\n"; } exit 0;