#February 5, 2001:  Minor changes to layout of comments
#                   Fixed a bug that caused duplicate typos in plaintext
#                   Incorporated level 8T
#   Still named reduce8.pl; older ones have descriptive names added
#January 18, 2001:  Changed line numbers to use SPAN tags.
#November 28, 2000: now turns xml entity "&ampersand;" into html "&".
#November 23, 2000: this version renders the XML entities I've changed in the
#                   massagex
#November 21, 2000: Inoutdelete now fixed.
#November 15, 2000: now reduce71.pl: I've changed the paragraph entity to
#                   "</P><P>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;", which works and
#                   indents
#November 9, 2000:  now called reduce7.pl, Nathan and I are in synch on this.
#                   All entities are Unicode where applicable.
# October 30, 2000: reduce61.pl: Inoutdelete as &#8834; ...5 (display ?? in
#                   Netscape 5).
#                   also has paragraph entity "par" which it prints as
#                   <BR> <BR>
#November 3, 2000:  illegibleword and illegibleletter also unicoded
#                   and also insertmarkmissing and insertmissing: should be
#                   all of them.
#June 2, 2000:      Nathan sends this as newReduce5.pl, fixing italics. 
#                   I modify it from the "short" form to the "letov" form, and
#                   change other displays.
#June 5, 2000:      now with code fixing dpunc too

#
# reduce*.pl - current revision of the reduction script
#

#
# Some enumerated types and related chunks of data
#

# Names for operating modes
&enum(<<'EOL');
  $geneticsMode,$plaintextMode
EOL

# Names for the possible output states
&enum(<<'EOL');
   $printingState,$nonprintingState,$tnState
EOL

# The possible flavors of deletions
&enum(<<'EOL');
   $implicitDeletion,$explicitDeletion
EOL

# Token types
&enum(<<'EOL');
   $tagToken,$printableToken,$entityToken
EOL

# Numeric identifiers for the levels...
&enum(<<'EOL');
   $lev0=0,$lev1,$lev1plus,$lev2,$lev3,$lev3plus,$lev4,$lev4plus,
   $lev5,$lev5prime,$lev5plus,$lev5plusprime,$lev5ddag,$lev5tdag,
   $lev6,$lev7,$lev7plus,$lev8,$lev8T,$lev8plus,$lev8plusT,$lev9,$lev9T,
   $lev10,$lev10T,$lev11,$lev11T,$lev12star,$lev13
EOL

# ...and the associative array for their names
%levelCodes = (
 "0",$lev0,
 "1",$lev1,
 "1+",$lev1plus,
 "2",$lev2,
 "3",$lev3,
 "3+",$lev3plus,
 "4",$lev4,
 "4+",$lev4plus,
 "5",$lev5,
 "5'",$lev5prime,
 "5+",$lev5plus,
 "5+'",$lev5plusprime,
 "5''",$lev5ddag,
 "5'''",$lev5tdag,
 "6",$lev6,
 "7",$lev7,
 "7+",$lev7plus,
 "8",$lev8,
 "8T",$lev8T,
 "8+",$lev8plus,
 "8+T",$lev8plusT,
 "9",$lev9,
 "9T",$lev9T,
 "10",$lev10,
 "10T",$lev10T,
 "11",$lev11,
 "11T",$lev11T,
 "12*",$lev12star,
 "13",$lev13
);

# Irritatingly, we need the inverse association as well.  This is a
# major workpoint.

@levelNames = (
 "0",
 "1",
 "1+",
 "2",
 "3",
 "3+",
 "4",
 "4+",
 "5",
 "5'",
 "5+",
 "5+'",
 "5''",
 "5'''",
 "6",
 "7",
 "7+",
 "8",
 "8T",
 "8+",
 "8+T",
 "9",
 "9T",
 "10",
 "10T",
 "11",
 "11T",
 "12*",
 "13"
);

#
# Variable initializations
#

##############################################################################
# Configurable parameters; control the operation of the program              #
# by changing these values.  The values for $operatingMode are               #
# $geneticsMode and $plaintextMode.                                          #
#                                                                            #
#                                                                            #
# These are default values; they can be overriden from the                   #
# command line on a PC or the options dialog on a Mac.                       #
#                                                                            #
# Usage: newReduce.pl -[gp] -m <min> -M <max> [-s] [-d] [-t] [-T]            #
#                      [-l <interval>] "input file"                          #
#                                                                            #
# -g for genetics, -p for plaintext                                          #
# <min> is the minimum level, <max> the maximum level                        #
# -s to run silently                                                         #
# -d to show double punctuation                                              #
# -t to treat DM/IM revisions as TDs                                         #
# -T to show typos                                                           #
# -l <num> to specify the line number interval (0 means no line numbers)     #
#                                                                            #
##############################################################################

$targetMinLevel=$lev0;
$targetMaxLevel=$lev13;
$operatingMode=$geneticsMode;
$runSilent=0;
$lineNumInterval=5;
$showDoublePunc=0;
$treatDMAsTD=0;
$showTypos=0;

if ($^O eq "MacOS") {
  MacPerl::LoadExternals("newReduce.rsrc");
      #MacPerl::LoadExternals("fw-XCMD");
      $configString=&Options();

      #use Mac::AppleEvents;
      #my $optionsOK;
    #$AppleEvent{'McPL','DATA'} = \&receiveOptions;
    #while (!$optionsOK) {sleep(1)}
} else {
    $configString=join(' ',@ARGV);
}

if ($configString =~ /-p/) {
    if ($configString =~ /-g/) {
     print STDERR "Conflicting arguments -p and -g\n";
     exit 1;
    }
    $operatingMode=$plaintextMode;
}

if ($configString =~ /-m (\S+)/) {
    $targetMinLevel=$levelCodes{$1};
}

if ($configString =~ /-M (\S+)/) {
    $targetMaxLevel=$levelCodes{$1};
}

if ($configString =~ /-s/) {
    $runSilent=1;
}

if ($configString =~ /-t/) {
    $treatDMAsTD=1;
}

if ($configString =~ /-T/) {
    if ($operatingMode==$geneticsMode) {
     $showTypos=1; # showTypos has no effect in plaintext
    }
}

if ($configString =~ /-d/) {
    $showDoublePunc=1;
}

if ($configString =~ /-l ([0-9]+)/) {
    $lineNumInterval=$1;
}

if ($configString =~ /"(.*)"$/) {
    $infileName=$1;
} elsif ($configString =~ /(\S+)$/) {
    $infileName=$1;
} else {
    print STDERR "Couldn't find infile name\n";
    exit 1;
}

################################
# End of configurable parameters
################################

$tagNestingDepth=0;
$overlayDepth=0;

$tokenIndex=0;
$tagIndex=0;
$tnIndex=1; # because TN 0 looks funny

$typoIsPending=0;

open(INFILE,$infileName) || die "Couldn't open input file $infileName";
if ($infileName=~/([^\.]*)\./) {
    $outfileName=$1.".html";
} else {
    $outfileName=$infileName.".html";
}
#open(OUTFILE,">$outfileName") || die "Couldn't open output file $outfileName";

print "<HTML>\n";
print "<HEAD><TITLE>";
print "Output from $infileName (";
if ($operatingMode==$geneticsMode) {
    print "genetics, minimum level $levelNames[$targetMinLevel], ";
    print "maximum level $levelNames[$targetMaxLevel])";
} else {
    print "plaintext, level $levelNames[$targetMinLevel])";
}
print "</TITLE>\n";
print "<LINK rel=\"stylesheet\" type=\"text/css\" ";
print "href=\"wake.css\" title=\"wake\"\n";
print "</HEAD>\n<BODY>\n<P>\n";

$textRoot = {
    tag => "",
    parent => 0,
    childIndex => 0,
    numChildren => 0,
    startText => "",
    selfText => "",
    endText => "",
    minLevel => $lev0,
    maxLevel => $lev13,
    modifierMaxLevel => $lev13,
    dontPrintChildren => 0,
};

$curTextObj=$textRoot;
$stackDepth=0;

# It doesn't make sense to recover plaintext for more than one
# level.
if ($operatingMode != $geneticsMode) {
    $targetMaxLevel=$targetMinLevel;
}

# main loop
while (($returnValue=getNextToken())==1) {
    if ($currentTokenType==$printableToken) {
     $curTextObj->{childList}[$curTextObj->{numChildren}++]
         =freshTextObject($currentToken);
    } elsif ($currentTokenType==$entityToken) {
     $curTextObj->{childList}[$curTextObj->{numChildren}++]
         =freshTextObject(entityToText($currentToken));
    } elsif ($currentTokenType==$tagToken) {

     if ($currentToken !~ /^\//) { # open tag
         $stackDepth++;
         $curTextObj->{childList}[$curTextObj->{numChildren}++]
          =newNestedTextUnit($currentToken);
         $curTextObj=$curTextObj->{childList}
            [$curTextObj->{numChildren}-1]; # down the stack

     } else { #close tag
         $stackDepth--;
         $curTextObj=$curTextObj->{parent}; # up the stack
     }
    } else {
     print STDERR "Tokenizer bug!  Attempting to continue...\n";
    }
}
printTextObj($curTextObj,0,$curTextObj->{numChildren},0);

# Now print the endnotes

print "</P>\n<P><B>Textual Notes</B></P>\n\n";

for ($i=1; $i<$tnIndex; $i++) {
    print "<P><A NAME=\"TN$i\"><SUP>$i</SUP></A>$tnText[$i]</P>\n\n";
}

print "\n</BODY>\n</HTML>\n";

close INFILE;
#close OUTFILE;

exit 0; # to recover Bill's DOS prompt


#
# And there ends the main flow of the program; everything from this
# point forward is a subroutine.
#

###############################
# Options event handler for Mac
###############################

sub receiveOptions {
    if ($^O ne "MacOS") {
     return;
    }

    my($event, $reply, $desc, $n) = @_;
    $desc = AEGetParamDesc($event, keyDirectObject());
    $n = AEPrint($desc);
    AEPutParam($reply, keyDirectObject(), typeChar(), 
            sprintf('%.f + %.f = %.f', $n, $n, $n+$n)
            );
    print AEPrint($event), "\n";
    print AEPrint($reply), "\n";
    AEDisposeDesc($desc);
    $optionsOK = 1;
    0;
}

######################
# Print a text object.
######################

sub printTextObj {
    my $theObj=$_[0];
    my $appendToTN=$_[1];
    my $returnValue=$_[2];
    my $unconditionalOnly=$_[3];
    my $i,$locNumChildren,$locChild;
    my $locChildIndex;
    my $locChildText,$locFirstWord;
    my $locContainedTypo,$locContainedDeletion;
    my $locMatchLength;
    
    if ($theObj->{dontPrintChildren}==1) {
     return $returnValue;
    }

    $locNumChildren=$theObj->{numChildren};

    if ($theObj->{tag} eq "TN") {
####################################################
# The following condition determines if the interval
# [$targetMinLevel,$targetMaxLevel] and the interval
# [$theObj->{minLevel},$theObj->{maxLevel}] overlap.
####################################################
     if (($targetMaxLevel >= $theObj->{minLevel})
         && ($targetMinLevel <= $theObj->{maxLevel}))
     {
         emit("<SUP><FONT SIZE=1 COLOR=\"black\"><A
HREF=\"\#TN$tnIndex\">TN$tnIndex</A></FONT></SUP>");
         # Append TN contents to the array
         for ($i=0; $i < $locNumChildren; $i++) {
          $locChild=$theObj->{childList}[$i];
          $locNumChildren=printTextObj($locChild,1,$locNumChildren,0);
         } 
         $tnIndex++;
     }
    } else {

###########
# Here too.
###########
     if (($targetMaxLevel >= $theObj->{minLevel})
         && ($targetMinLevel <= $theObj->{maxLevel}))
     {
         if ($appendToTN==1) {
          $tnText[$tnIndex].=$theObj->{startText};
          if ($locNumChildren==0) {
              $tnText[$tnIndex].=$theObj->{selfText};
          } else {
              for ($i=0; $i < $locNumChildren; $i++) {
               $locChild=$theObj->{childList}[$i];
               $locNumChildren=printTextObj($locChild,1,$locNumChildren,0);
              }
          }
          $tnText[$tnIndex].=$theObj->{endText};
         } else {
          # Fall out immediately in the peculiar case that this revision
          # is a typo and typos are being suppressed
          if (($operatingMode==$plaintextMode) || (!$showTypos)
              && ($theObj->{tag} eq "REVISION")) {
              for ($i=0; $i<$locNumChildren; $i++) {
               $locChild=$theObj->{childList}[$i];
               if ($locChild->{tag} eq "TYPO") {
                   return $returnValue;
               }
              }
          }

          if ((($operatingMode==$geneticsMode) && ($targetMinLevel <=
                                    $theObj->{modifierMaxLevel}))
              || ($theObj->{unconditionalText}==1)) {
              emit($theObj->{startText});
          }
          if ($locNumChildren==0) {
              emit($theObj->{selfText});
          } else {
              for ($i=0; $i < $locNumChildren; $i++) {
               $locChild=$theObj->{childList}[$i];
               $locNumChildren=printTextObj($locChild,0,$locNumChildren,0);
              }
          }
          if ((($operatingMode==$geneticsMode) && ($targetMinLevel <=
                                    $theObj->{modifierMaxLevel}))
              || ($theObj->{unconditionalText}==1)) {
              emit($theObj->{endText});
          }
         }
# This "else" is counterintuitive.  We print the children even if
# the parent failed the level check; this is for things like line
# numbers whose levels are not related to those of their parents.
     } else {
         for ($i=0; $i < $locNumChildren; $i++) {
          $locChild=$theObj->{childList}[$i];
          $locNumChildren=printTextObj($locChild,0,$locNumChildren,1);
         }
     }
    }
    $returnValue;
}

sub isFirstNonspaceChild {
    my $locThePtr=$_[0];
    my $locLoopVar;

    $locLoopVar=$locThePtr->{childIndex}-1;
    while ($locLoopVar >= 0) {
     $locIsSpace=&isspace($locThePtr->{parent}->{childList}[$locLoopVar]->{selfText});
     if (($locThePtr->{parent}->{childList}[$locLoopVar]->{tag} ne "")
         || ($locIsSpace==0)){
         last;
     }
     $locLoopVar--;
    }

    if ($locLoopVar < 0) {
     1;
    } else {
     0;
    }
}

#################################################################
# Climb through the hierarchy to determine the appropriate levels
# to inherit from containing objects.
#################################################################

sub stackClimb {
    my $locMinLevel=$_[0],$locMaxLevel=$_[1];
    my $locTempPtr=$curTextObj;

    my $locNothing=0;
    my $locLoopVar=0;
    my $locIsSpace=0;

    # All this code is to handle the exceptional case of deletions.
    # It walks up the hierarchy looking for a containing deletion;
    # if it finds one, it either inherits its minimum level from
    # the deletion itself, or (if the deletion is the first element
    # in a revision) from the parent of the containing revision.

    while (($locTempPtr != 0) && ($locTempPtr->{tag} ne "DELETION")) {
     $locTempPtr=$locTempPtr->{parent};
    }

    $locSavedMinLevel=$locTempPtr->{minLevel};
    $locSavedMaxLevel=$locTempPtr->{maxLevel};

    if ($locTempPtr->{tag} eq "DELETION") {
     # Some tags have no level semantics and shouldn't interfere with the
     # climbing process.  That is, in <REVISION><FIRST><DELETION>... the
     # deletion is the first nonblank child in spite of the containing <FIRST>.

     while ((($locTempPtr->{parent}->{tag} eq "TYPO")
          || ($locTempPtr->{parent}->{tag} eq "FIRST")
          || ($locTempPtr->{parent}->{tag} eq "DOUBTFUL")
          || ($locTempPtr->{parent}->{tag} eq "CORRECTION")
          || ($locTempPtr->{parent}->{tag} eq "OVERLAY"))
            && (&isFirstNonspaceChild($locTempPtr) != 0)) {
         $locTempPtr=$locTempPtr->{parent};
     }

     if ($locTempPtr->{parent}->{tag} eq "REVISION") {
         if (&isFirstNonspaceChild($locTempPtr) != 0) {
          $locMinLevel=$locTempPtr->{parent}->{parent}->{minLevel};
         } else {
          $locMinLevel=$locTempPtr->{parent}->{minLevel};
         }
     }

     # The max and min levels of the parent deletion should be the same.
     $locMaxLevel=$locSavedMinLevel;
     if ($operatingMode==$plaintextMode) {
         $locMaxLevel--;
     }

    }

    # Revisions also behave strangely.  If we're inside a revision,
    # the min level is inherited from the revision, and the max
    # level is either $lev13 or inherited from the first containing
    # deletion.

    # Here "inside" means "immediately inside" (TBR).  If there's an
    # intervening tag (<I>, <DOUBTFUL>, or whatever), we should inherit
    # the correct levels from that.

    $revPtr=$curTextObj;

    if ($revPtr->{tag} eq "REVISION") {

     $locTempPtr=$revPtr;
     while (($locTempPtr != 0)
            && ($locTempPtr->{tag} ne "DELETION")) {
         $locTempPtr=$locTempPtr->{parent};
     }         

     if ($locTempPtr->{tag} eq "DELETION") {
         $locMaxLevel=$locTempPtr->{maxLevel};
     } else {
         $locMaxLevel=$lev13;
     }
    }

    {
     min=>$locMinLevel,
     max=>$locMaxLevel,
    };
}

##############################
# Create a new textual object.
##############################

sub freshTextObject {
    my $theText=$_[0];
    my $locMinLevel;
    my $locMaxLevel;
    my $locTempPtr=$curTextObj;
    my $locDummyHash;

    # Special case: If the immediate parent is a LINENUM tag, this text is a
    # line number and is processed accordingly.

    if ($curTextObj->{tag} eq "LINENUM") {
     if ($lineNumInterval==0) {
         $theText="";
     } else {
         if ($theText =~ /([0-9]+)$/) {
          if ($1 % $lineNumInterval != 0) {
              $theText="";
          }
         }
     }
    }

    # Another special case: If the immediate parent is a DPUNC, theText
    # is left blank unless double punctuation is on.

    if ($curTextObj->{tag} eq "DPUNC") {
     if ($showDoublePunc==0) {
         $theText="";
     }
    }

    $locMinLevel=$curTextObj->{minLevel};
    $locMaxLevel=$curTextObj->{maxLevel};

    $locDummyHash=&stackClimb($locMinLevel,$locMaxLevel);
    $locMinLevel=$locDummyHash->{min};
    $locMaxLevel=$locDummyHash->{max};

    {
     tag => "",
     parent => $curTextObj,
     childIndex => $curTextObj->{numChildren},
     numChildren => 0,
     childList => [],
     startText => "",
     selfText => $theText,
     endText => "",
     unconditionalText => 0,
     minLevel => $locMinLevel,
     maxLevel => $locMaxLevel,
     modifierMaxLevel => $locMaxLevel,
     dontPrintChildren => 0,
    };
}

###########################################################
# Create a new text unit nested down a level.  The argument
# is an open tag, including any attributes.
###########################################################

sub newNestedTextUnit {
    my $theText=$_[0];
    my $theTag,$theAttrs;
    my $locStartText="",$locEndText="";
    my $locMinLevel,$locMaxLevel,$locModMaxLevel;
    my $locTypo=0,$locUnconditionalText=0;
    my $locDummyHash;
    my $locDontPrintChildren=0;

    if ($runSilent==0) {
### $runsilent is probably a command line option to disable verbose output.
### manually disable here to allow data to be printed to STDOUT instead of 
### a file.
#     print "Tag $theText\n";
    }

    if ($theText =~ /^(\S+)\s+(.+)$/) { # If there are attributes...
     $theTag=$1;
     $theAttrs=$2;
    } else {
     $theTag=$theText;
    }

    $locMinLevel=$curTextObj->{minLevel};
    $locMaxLevel=$curTextObj->{maxLevel}; # usually right

    if ($theTag eq "REVISION") {
     if ($theAttrs =~ /LEVEL="([^"]*)"/) {
         $locMinLevel=$levelCodes{$1};
      } else {
         print STDERR "Revision without a level!\n";
         return;
      }
      $locStartText=" <SUP><FONT COLOR=\"black\">[$levelNames[$locMinLevel]";
      if ($theAttrs =~ /TD="YES"/) {
         $locStartText .= "<FONT SIZE=1> TD</FONT>";
      }
      if ($theAttrs =~ /IM="YES"/) {
         if ($treatDMAsTD==0) {
            $locStartText .= "<FONT SIZE=1> IM</FONT>";
         } else {
            $locStartText .= "<FONT SIZE=1> TD</FONT>";
         }
      }
      if ($theAttrs =~ /CONTINUATION="YES"/) {
         $locStartText .= " -";
      }
      $locStartText .= "</FONT></SUP> ";
      $locEndText=" <SUP><FONT COLOR=\"black\">";
      if ($theAttrs =~ /CONTINUED="YES"/) {
         $locEndText .= "- ";
      }
      $locEndText .= $levelNames[$locMinLevel]."]</FONT></SUP> ";
      if ($curTextObj->{parent}==0) {
         $locMaxLevel=$lev13;
      }
      $locModMaxLevel=$locMinLevel; 
  } elsif ($theTag eq "DELETION") {
      # Typos need to be handled specially here
      if (($showTypos) && ($curTextObj->{tag} eq "TYPO")) {
           # Not sure what happens here
      } else {
           # For a deletion, both min and max are the min of the parent revision.
           # This assures that the start and end text will only appear
           # if the level at which the deletion took place is being
           # recovered.

           $locTempPtr=$curTextObj;
           while (($locTempPtr != 0) && ($locTempPtr->{tag} ne "REVISION")) {
              $locTempPtr=$locTempPtr->{parent};
           }

           $locMinLevel=$locTempPtr->{minLevel};
           $locMaxLevel=$locTempPtr->{minLevel};
           $locModMaxLevel=$locMaxLevel;
      }

      $locEndText="</FONT>";
      if ($theAttrs =~ /TYPE="IMPLICIT"/) {
         $locStartText="&lt;";
         $locEndText .= "&gt;";
      }
      $locStartText .= "<FONT COLOR=\"red\">";

   } elsif ($theTag eq "FIRST") {
      if ($curTextObj->{tag} ne "DELETION") {
         $prevLevel=$curTextObj->{minLevel};
      } else {
         $outerContainer=$curTextObj;
         while (($outerContainer != 0)
                && ($outerContainer->{tag} ne "REVISION")) {
            $outerContainer=$outerContainer->{parent};
         }
         if ($outerContainer != 0) {
            $prevLevel=$outerContainer->{parent}->{minLevel};
         } else {
            $prevLevel=$lev0;
         }
         $locMaxLevel=$curTextObj->{maxLevel};
         $locModMaxLevel=$locMaxLevel;
      }
      $locMinLevel=$lev0;
      if ($targetMinLevel==0) {
            $locStartText="<B>";
            $locEndText="</B>";
      } else {
            $locStartText="";
            $locEndText="";
      }
   } elsif ($theTag eq "INOUTDEL") {
      if ($operatingMode==$geneticsMode) {
            $locMinLevel=$curTextObj->{minLevel};
            $locMaxLevel=$curTextObj->{minLevel};#was maxlevel, fixed November 21, 2000
            $locModMaxLevel=$locMaxLevel;
            $locStartText="&#8834;<FONT COLOR=\"red\">"; #was {}
            $locEndText="</FONT>&#8835;";
      } else {
            $locMinLevel=$lev13+1;
            $locMaxLevel=$lev13+1;
      }

   } elsif ($theTag eq "OVERLAY") {
      if ($theAttrs =~ /DEPTH=([0-9]*)/) {
            $overlayDepth=$1;
         } else {
            print "Warning: Overlay without depth\n";
         }
      if ($overlayDepth % 2 == 1)
      {
         $overlayStart="&gt;"x$overlayDepth;
         $overlayEnd="&lt;"x$overlayDepth;
      } else {
         $overlayStart="&#187;"x($overlayDepth/2);
         $overlayEnd="&#171;"x($overlayDepth/2);
      }
      $locStartText="<SUP><FONT SIZE=1 COLOR=\"black\">".
          $overlayStart."</FONT></SUP>";
      $locEndText="<SUP><FONT SIZE=1 COLOR=\"black\">".
          $overlayEnd."</FONT></SUP>";
      $locDummyHash=&stackClimb($locMinLevel,$locMaxLevel);
      $locMinLevel=$locDummyHash->{min};
      $locMaxLevel=$locDummyHash->{max};
      $locModMaxLevel=$locMaxLevel;

      } elsif ($theTag eq "LINENUM") {
          $locUnconditionalText=1;
          $locStartText="<SPAN CLASS=\"linenum\"><FONT COLOR=\"black\">";
          $locEndText="</FONT></SPAN>";
          $locMinLevel=$lev0;
          $locMaxLevel=$lev13;
          $locModMaxLevel=$locMaxLevel;

      } elsif ($theTag eq "TYPO") {
          $locStartText = "<SUP><FONT SIZE=1> TYPO</FONT></SUP>";
          $locEndText = "<SUP><FONT SIZE=1> TYPO] </FONT></SUP>";

          if ($showTypos==0) {
           $locMinLevel=$lev13+1;
           $locMaxLevel=$lev13+1;
           $locDontPrintChildren=1;
       }

      } elsif ($theTag eq "CORRECTION") {
      # if typos are not being suppressed, set the correction to never show
          if (($operatingMode==$geneticsMode) && ($showTypos==1)) {
           $locMinLevel=$lev13+1;
           $locMaxLevel=$lev13+1;
           $locDontPrintChildren=1;
       }
      } elsif ($theTag eq "DPUNC") {
          # do nothing

      } elsif ($theTag eq "TN") {
          # just check for FIRSTLEVEL and LASTLEVEL attrs
          if ($theAttrs =~ /FIRSTLEVEL="([^"]*)"/) {
               $locMinLevel=$levelCodes{$1};
            }
            if ($theAttrs =~ /LASTLEVEL="([^"]*)"/) {
             $locMaxLevel=$levelCodes{$1};
                $locModMaxLevel=$locMaxLevel;
            }

      } elsif ($theTag eq "DOUBTFUL") {
          $locUnconditionalText=1;
          $locStartText="<SUP>?</SUP>";
          $locEndText="<SUP>?</SUP>";
          $locDummyHash=&stackClimb($locMinLevel,$locMaxLevel);
          $locMinLevel=$locDummyHash->{min};
          $locMaxLevel=$locDummyHash->{max};
          $locModMaxLevel=$locMaxLevel;

      } else { # Assume it's an HTML tag
          $locUnconditionalText=1;
          $locStartText="<".$theText.">";
          $locEndText="</".$theTag.">";
          $locDummyHash=&stackClimb($locMinLevel,$locMaxLevel);
          $locMinLevel=$locDummyHash->{min};
          $locMaxLevel=$locDummyHash->{max};
          $locModMaxLevel=$locMaxLevel;
      }

      {
          tag => $theTag,
          parent => $curTextObj,
          childIndex => $curTextObj->{numChildren},
          numChildren => 0,
          childList => [],
          startText => $locStartText,
          selfText => "",
          endText => $locEndText,
          unconditionalText => $locUnconditionalText,
          minLevel => $locMinLevel,
          maxLevel => $locMaxLevel,
          modifierMaxLevel => $locModMaxLevel,
          dontPrintChildren => $locDontPrintChildren,
      };
}

#######################################
# Routine to convert an entity to text.
#######################################

sub entityToText {
    my $theEntity=$_[0];
    my $locMinLevel,$locMaxLevel,$locDummyHash;

    $locMinLevel=$curTextObj->{minLevel};
    $locMaxLevel=$curTextObj->{maxLevel};

    $locDummyHash=&stackClimb($locMinLevel,$locMaxLevel);
    $locMinLevel=$locDummyHash->{min};
    $locMaxLevel=$locDummyHash->{max};

    if ($theEntity eq "missinginsertmark") {
     if (($operatingMode==$geneticsMode)
            && (($targetMinLevel <= $locMinLevel)
          && ($targetMaxLevel >= $locMinLevel))) {
         "<FONT SIZE=-2><SUB>&#9674;</SUB></FONT>";
     } else {
         "";
     }
    } elsif ($theEntity eq "missinginsert") {
     if (($operatingMode==$geneticsMode)
            && (($targetMinLevel <= $locMinLevel)
          && ($targetMaxLevel >= $locMinLevel))) {
         "<FONT SIZE=-5><SUB>&#916;</SUB></FONT>";
     } else {
         "";
     }
    } elsif ($theEntity eq "spacereserved") {
     if (($operatingMode==$geneticsMode) 
            && (($targetMinLevel <= $locMinLevel)
          && ($targetMaxLevel >= $locMinLevel))) {
         "&Oslash;";
     } else {
         "";
     }
    } elsif ($theEntity eq "os1") {
     if (($operatingMode==$geneticsMode) 
            && (($targetMinLevel <= $locMinLevel)
          && ($targetMaxLevel >= $locMinLevel))) {
         "<SUP><Font Size=-5 color=\"black\">A</Font><font
color=\"black\">&gt;</font></SUP>";
     } else {
         "";
     }
    } elsif ($theEntity eq "os2") {
     if (($operatingMode==$geneticsMode) 
            && (($targetMinLevel <= $locMinLevel)
          && ($targetMaxLevel >= $locMinLevel))) {
         "<SUP><Font Size=-5 color=\"black\">B</Font><font
color=\"black\">&gt;</font></SUP>";
     } else {
         "";
     }
    } elsif ($theEntity eq "os3") {
     if (($operatingMode==$geneticsMode) 
            && (($targetMinLevel <= $locMinLevel)
          && ($targetMaxLevel >= $locMinLevel))) {
         "<SUP><Font Size=-5 color=\"black\">C</Font><font
color=\"black\">&gt;</font></SUP>";
     } else {
         "";
     }
    } elsif ($theEntity eq "os4") {
     if (($operatingMode==$geneticsMode) 
            && (($targetMinLevel <= $locMinLevel)
          && ($targetMaxLevel >= $locMinLevel))) {
         "<SUP><FONT SIZE=-5 color=\"black\">D</FONT><font
color=\"black\">&gt;</font></SUP>";
     } else {
         "";
     }
    } elsif ($theEntity eq "os5") {
     if (($operatingMode==$geneticsMode) 
            && (($targetMinLevel <= $locMinLevel)
          && ($targetMaxLevel >= $locMinLevel))) {
         "<SUP><FONT SIZE=-5 color=\"black\">E</FONT><font
color=\"black\">&gt;</font></SUP>";
     } else {
         "";
     }
    } elsif ($theEntity eq "os6") {
     if (($operatingMode==$geneticsMode) 
            && (($targetMinLevel <= $locMinLevel)
          && ($targetMaxLevel >= $locMinLevel))) {
         "<SUP><FONT SIZE=-5 color=\"black\">F</FONT><font
color=\"black\">&gt;</font></SUP>";
     } else {
         "";
     }
    } elsif ($theEntity eq "oe1" ) {
     if (($operatingMode==$geneticsMode) 
            && (($targetMinLevel <= $locMinLevel)
          && ($targetMaxLevel >= $locMinLevel))) {
         "<SUP><font color=\"black\">&lt;</font><FONT SIZE=-5
color=\"black\">A</FONT></SUP>";
     } else {
         "";
     }
    } elsif ($theEntity eq "oe2" ) {
     if (($operatingMode==$geneticsMode) 
            && (($targetMinLevel <= $locMinLevel)
          && ($targetMaxLevel >= $locMinLevel))) {
         "<SUP><font color=\"black\">&lt;</font><FONT SIZE=-5
color=\"black\">B</FONT></SUP>";
     } else {
         "";
     }
    } elsif ($theEntity eq "oe3" ) {
     if (($operatingMode==$geneticsMode) 
            && (($targetMinLevel <= $locMinLevel)
          && ($targetMaxLevel >= $locMinLevel))) {
         "<SUP><font color=\"black\">&lt;</font><FONT SIZE=-5
color=\"black\">C</FONT></SUP>";
     } else {
         "";
     }
    } elsif ($theEntity eq "oe4" ) {
     if (($operatingMode==$geneticsMode) 
            && (($targetMinLevel <= $locMinLevel)
          && ($targetMaxLevel >= $locMinLevel))) {
         "<SUP><font color=\"black\">&lt;</font><FONT SIZE=-5
color=\"black\">D</FONT></SUP>";
     } else {
         "";
     }
    } elsif ($theEntity eq "oe5" ) {
     if (($operatingMode==$geneticsMode) 
            && (($targetMinLevel <= $locMinLevel)
          && ($targetMaxLevel >= $locMinLevel))) {
         "<SUP><font color=\"black\">&lt;</font><FONT SIZE=-5
color=\"black\">E</FONT></SUP>";
     } else {
         "";
     }
    } elsif ($theEntity eq "oe6" ) {
     if (($operatingMode==$geneticsMode) 
            && (($targetMinLevel <= $locMinLevel)
          && ($targetMaxLevel >= $locMinLevel))) {
         "<SUP><font color=\"black\">&lt;</font><FONT SIZE=-5
color=\"black\">F</FONT></SUP>";
     } else {
         "";
     }
    } elsif ($theEntity eq "illegibleword" ) {
     "<Font Size=+1>&#25A1;</Font>";
    } elsif ($theEntity eq "illegibleletter" ) {
     "&#25AB;";
    } elsif ($theEntity eq "par" ) {
     "</P><P>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;";
    } elsif ($theEntity eq "emdash" ) {
     "&#8212;";
    } elsif ($theEntity eq "ddagger" ) {
     "&#0135;";
    } elsif ($theEntity eq "a_um" ) {
     "&#0228;";

    } elsif ($theEntity eq "e_acute" ) {
     "&eacute;";
    } elsif ($theEntity eq "e_grave" ) {
     "&egrave;";
    } elsif ($theEntity eq "e_circ" ) {
     "&ecirc;";
    } elsif ($theEntity eq "ae_lig" ) {
     "&aelig;";
    } elsif ($theEntity eq "op_smquo" ) {
     "&#0147;";
    } elsif ($theEntity eq "cl_smquo" ) {
     "&#0148;";
    } elsif ($theEntity eq "sect" ) {
     "&#167;";
    } elsif ($theEntity eq "l_sqbr" ) {
     "&#091;";
    } elsif ($theEntity eq "r_sqbr" ) {
     "&#093;";
    } elsif ($theEntity eq "l_T_sig" ) {
     "<Font Face=Wake>K</Font>";
    } elsif ($theEntity eq "r_T_sig" ) {
     "<Font Face=Wake>J</Font>";
    } elsif ($theEntity eq "d_T_sig" ) {
     "<Font Face=Wake>I</Font>";
    } elsif ($theEntity eq "u_T_sig" ) {
     "<Font Face=Wake>T</Font>";
   } elsif ($theEntity eq "ampersand" ) {
     "&amp;";
    } else {
     # Other non-HTML entities are unsupported for now
     "&".$theEntity.";";
    }
}

############
# Tokenizer.
############

sub getNextToken {

    if (length $tokenStream==0) { # if there's no text buffered...
     $tokenStream=<INFILE>;
     if (!$tokenStream) {
         return 0; # EOF
     }
     #chop $tokenStream;
    }
    # Now we have some text to process; match a token.

    # These are usually right
    $currentTokenType=$printableToken;
    $currentTokenAttrs="";

    if ($tokenStream =~ /^\n*$/) {
     $currentToken="\n";
     $tokenStream=<INFILE>;
     while ($tokenStream =~ /^$/) {
         $currentToken .= "\n";
         $tokenStream=<INFILE>;
         if (!$tokenStream) {
          return 0; # EOF
         }
     }
     # Set up new stream to be used after this long elsif...
     $newStream=$tokenStream;
    } elsif ($tokenStream =~ /^\s+/) {
     $currentToken=" ";
     $newStream=$';
# Whitespace does not terminate a text token; big savings in memory usage!
    } elsif ($tokenStream =~ /^[^<&]+/){
     $currentToken=$&;
     $newStream=$';
    } elsif ($tokenStream =~ /^&([^;]*);/) { # entity
     $currentToken=$1;
     $currentTokenType=$entityToken;
     $newStream=$';
    } elsif ($tokenStream =~ /^<([^>]*)>/) { # tag
     $currentToken=$1;
     $currentToken =~ tr/a-z/A-Z/;
     $currentTokenType=$tagToken;
     $newStream=$';
    } elsif ($tokenStream =~ /^<([^>]*)$/) { # partial tag
     $currentToken="$1 ";
     $currentTokenType=$tagToken;
     while ($tokenStream=<INFILE>) {
         # chop $tokenStream;
         if ($tokenStream =~ /^([^>]*)>/) {
          $currentToken .= $1;
          $tokenStream = $';
          last;
         } else {
          $currentToken .= "$tokenStream ";
         }
         $currentToken =~ tr/a-z/A-Z/;
         $newStream=$';
     }
     return 1;
    } else {
     print STDERR "Parser failed!\n";
     print STDERR "Next tokens: ($tokenStream)\n";
     return 0;
    }
    $tokenStream=$newStream;
    return 1;
}

##################################################################
# Printing subroutine.
##################################################################

sub emit {
    local $theText=$_[0];

    # Handle carriage returns.  If the token starts with a carriage return, and
    # we haven't printed anything since the last CR token, it is not printed.

    if (($theText =~ /^\n/) && ($outputSinceLastCR==0)){
     return;
    }

    if ($theText=~/^\n\n+$/) {
     print "</P>\n\n<P>";
    } else {
     print $theText;
    }
    if ($theText=~/\n$/) {
     $outputSinceLastCR=0;
    } else {
     $outputSinceLastCR=1;
    }
}

#################
# isspace, ‡ la C
#################

sub isspace {
    local $theText=$_[0];

    my $c;
    my @chars=split(//,$theText);

    foreach $c (@chars) {
     if (($c ne " ") && ($c ne "\n")) {
         return 0;
     }
    }
    1;
}

#####################################
# enum subroutine from the Camel Book
#####################################

sub enum {
    local($_)=@_;
    local(@specs)=split(/,/);
    local($val);
    for (@specs) {
     if (/=/) {
         $val = eval $_;
     } else {
         eval $_ . ' = ++$val';
     }
    }
}