#!/usr/bin/perl 
use warnings;

#
# COMMENTS
#
# This script takes class lists downloaded from DuckWeb and converts
# them into "classlist.lst" files suitable for uploading to WebWork.
#
# Students' logins will be their DuckID (the part of their UO email
# address to the left of the "@".  Their initial passwords will be
# their ID numbers.  For students who have requested that their email
# addresses not be included in the classlist, both their login ID and
# their password will be their ID numbers.
#
# This program makes a terrible number of assumptions about its input.
# The ones I can think of are:
#
# The input is of the form that DuckWeb supplies under "download
# classlist" in Fall 2008, for a course without discussion sections.
#
# The name field contains exactly one comma, used to
# separate the last name from the rest of the name.
#
# The table containing student data is the only table with an entry
# called "Student name".  That table contains a line for each student
# with 7 entries: name, id number, unused by webwork, unused by
# webwork, unused by webwork, email, comment. (ACTUALLY IT IS WORSE
# THAN THIS.  THERE IS AN ADDITIONAL FIELD, SOMETIMES, FOR GRADES.
# ALSO UNUSED BY WEBWORK)
#
# In courses without discussion sections, the comment should end up
# being ignored.  In courses with discussion sections, the comment
# should give the CRN of the section number, and then be put into the
# field to get read into the section variable of WebWork.
#
# in each entry of student data used by webwork, the data is the only
# data between a ">" and a "<", except for the email entry, and then
# it is the datum enclosed by the second pair of ">" and "<".
#
# that the input filename has a period in it and the part of the name
# to the left of the period isn't empty.
#
# If the CRN is in the 16th row (!) of the input file, then the output
# file will be the CRN with ".lst" appended.  Otherwise it will be the
# part of the filename to the left of the period, followed by ".lst".
#
# For each student the input file consists of 8 fields, each on a line of 
# html code.
#
# field 1:  name in form: last, first (possibly with a middle initial)
# field 2:  ID number
# field 3:  class status (e.g. FR, SO, JU, SR)  (ignored)
# field 4:  major (ignored)
# (field 5:  grade in class (ignored) SOMETIMES!)
# field 6:  grading option (P or G) (ignored)
# field 7:  email address
# field 8:  section of multisection class
#
# output file is a nine field CSV of form
#
# ID, lastname, firstname (with MI), C, ,section, ,email, duckid
#
#
# USAGE: 
# save as html-classlist-duckid
#
# 1) download the classlist from DuckWeb.  It will end up in a file
# named hwskclst.P_ClassList.
#
# 2) at the terminal window, run the command perl
#
# perl html-classlist-duckid hwskclst.P_ClassList
#
# (If you make the file html-classlist-id executable, you can skip the
# "perl" in the above.)
#
# This will make a new file called either by the CRN with ".lst"
# added, or called hwskclst.lst.  In either case, it should now be in
# the right format for WebWork to understand.
#
# 3) Log into your course on WebWork.  Go to Instructor Tools -> File
# Manager.  Upload the file you just produced (hwskclst.lst).
#
# 4) Go to Instructor Tools -> Classlist Editor.  Click the "Import
# users from file..." option and make sure that hwskclst.lst is
# selected in the little menu.
#
# 5) Take Action!
#
# You should now be done, and you should see the students in the classlist.
#
# You can do this repeatedly as registration changes.  No user should
# get added twice, but new users should get added (old ones won't get
# deleted if they drop though).


my $pattern = "Student name";
# used to find when table containing student data begins
my $endpattern = "TABLE";
# used to find when table containing student data ends
my $mode = 0;
# set to 1 when in table containing student data
my $gradepattern = "Grade";
# sometimes classlist has a grades column.  Used to check for that
my $gradecol = 0;
# 0 if no grade column, set to 1 if there is a grade column.
my $inrecord = 0;
# set to 1 when in record for a student
my $beginrecord = "<TR>";
# sign that a record for a student is beginning (if $mode = 1)
my $name = "blank";
# holds student name as Lastname, First M
my $id = "blank";
# holds student id number
my $duckid = "blank";
# holds non-domain part of email address
my $email = "blank";
# holds email address
my @fields = split /@/, $email;
# used to split email address into duckid plus domain
# my @filename = split /\./, $ARGV[0];
# used to split argument (input file) into filename plus extension
my $outfile_open = 0;
# used to signal when to open the output file - we don't want to do it
# until we've tried to capture the CRN.
my $line = " ";
#current line of file
my $section = " ";
# CRN of section student is in for courses with discussion sections
my $linecounter = 0;
# keeps track of which line of the file we are on.  Used for trying to find CRN.

$FilePath="$ENV{HOME}/Desktop/hwskclst.p_ClassList";

open (FH,$FilePath);

my $outfile = join ('.', "temp","lst");

# 
# pattern marks beginning of list of students.  
# mode becomes 1 when the list of students is about to begin, mode is 0
# before that point.

# inrecord is 1 while parsing a student record, 0 while not doing that.

while ($line=<FH>) {
    $linecounter++;
    if ($linecounter == 16) {
        if ($line =~ /\>(\d+)\</) {
             my $crn = $1;
             $outfile = join('.', $crn,"lst");
        }
        $outfile_open = 1;
    }
    if ($outfile_open == 1) {
        open OUTFILE, ">$ENV{HOME}/Desktop/$outfile" or die $!;
        $outfile_open = 0;
        }
    if ($line =~ /$pattern/) {
	$mode = 1;
    }
    if ($mode ==1 and $line =~ /$gradepattern/) {
	$gradecol = 1;
    }    
    if ($line =~ /$endpattern/) {
	$mode = 0;
    }
    if ($mode == 1 and $line =~ /$beginrecord/) {
	$inrecord = 1;
    }
    if ($mode == 1 and $inrecord == 1) {
	$line = <FH>;
	if ($line =~ /\>(.*?)\</) {
	    $name = $1;
	}
	$line = <FH>;
	if ($line =~ /\>(.*?)\</) {
	    $id = $1;
	}
	$line = <FH>;
	$line = <FH>;
	if ($gradecol == 1) {
	    $line = <FH>;
	}
	$line = <FH>;
        $line = <FH>;
	if (@myarray = ($line =~ /\>(.*?)\</g)) {
	    $email = $myarray[1];
	}
        if (scalar(@myarray) > 1) {
   	     @fields = split /@/, $email;
        }
        else {
            $fields[0] = $id;
            $email = " ";
        } 
        $line = <FH>;
        if ($line =~ /\>(.*?)\</) {
	    $section = $1;
	    if ($section =~ /&/) {
		$section = " ";
	    }
        }
	print OUTFILE "$id",",","$name",",C, ,$section , ,","$email",",","$fields[0]\n";
	$inrecord = 0;
    }
}