#!/bin/sh -u
# Validate some aspects of a properly-written shell script.
# ------------------------------------------------------------------
# Syntax:
#  $0 [ pathname ]
# ------------------------------------------------------------------
# Purpose: 
#  Validate some aspects of a single properly-written shell script.
#  Prompt for and read the script name if it is missing.
#  Perform a series of 8 tests on the script:
#   1. test the assignment label for Student Name
#   2. test the assignment label for Algonquin Email Address
#   3. test the assignment label for Student Number
#   4. test the assignment label for Course Number
#   5. test the assignment label for Lab Section Number
#   6. test the first line for a correct shell intepreter
#   7. look for a vald shell PATH= line
#   8. look for a vald umask line
#  Exit 0 if the argument pathname passed all of the tests without error.
#  Exit 1 if the argument was not a non-empty, readable, executable file.
#  Exit 2 if more than one argument was given to the script.
#  If the argument pathname had errors, exit with a value that is the
#  number of errors plus 10, e.g. exit 15 for a count of 5 errors.
# ------------------------------------------------------------------
# Student Name:                   Ian! D. Allen
# Algonquin Email Address:        alleni
# Student Number:                 000-000-000
# Course Number:                  CST 8129
# Lab Section Number:             011
# Professor Name:                 Dennis Ritchie & Brian Kernighan
# Assignment Name/Number/Date:    Assignment #2 due November 19
# Comment:                        This is a sample assignment label.
# see http://idallen.com/teaching/assignment_standards.html
# ------------------------------------------------------------------

# Set the search path for the shell to be the standard places.
# Set the umask to be non-restrictive and friendly to others.
#
export PATH=/bin:/usr/bin
umask 022

# The number of lines of the script to extract using "head" when
# looking for Assignment Submission Label comment lines.
# This has to be large enough to pick off the whole script header.
# I set it to 60 to accommodate the largest header used in the scripts
# under the Notes section for this course (CST8129 Fall 2002).
#
headlines=60

# Validate the number of arguments; prompt for and read a missing script name.
# Exit 2 on error (meaning "syntax error calling the script").
#
if [ $# -eq 0 ] ; then
    echo 1>&2 "Enter script pathname:"
    read pathname
elif [ $# -eq 1 ] ; then
    pathname="$1"
else
    echo 1>&2 "$0: only 1 script name argument allowed, you entered $# ($*)"
    exit 2
fi

# Make sure the argument is a non-empty, readable, executable file.
# Exit 1 on any errors (meaning "unable to process your argument").
#
if [ ! -f "$pathname" ] ; then
    echo 1>&2 "$0: '$pathname' is not a file"
    exit 1
fi
if [ ! -r "$pathname" ] ; then
    echo 1>&2 "$0: '$pathname' is not readable"
    exit 1
fi
if [ ! -s "$pathname" ] ; then
    echo 1>&2 "$0: '$pathname' is empty"
    exit 1
fi
if [ ! -x "$pathname" ] ; then
    echo 1>&2 "$0: '$pathname' is not executable"
    exit 1
fi

# Function definitions go here ------------------------------------

# General Documentation on how these testing functions work:
#
# Defined here are a set of functions to test different parts of the script.
# All the functions take one argument: the script file name to open.
#
# +++++++++++++++++++++++++++++++++++++++++
# Assignment Label Validation Functions (5)
# +++++++++++++++++++++++++++++++++++++++++
# This first set of 5 testing functions check various fields in the
# Assignment Submission Label comments.  All these functions have
# these two parts:
#
# Part I - look for a comment line based on keywords in the line:
#   Look for a particular set of keywords in the top N lines of the file:
#     - select the first N lines from the file using "head -N"
#       (N has to be big enough to include the whole script header)
#     - run the N lines through egrep using a regular expression 
#       to select just comment lines containing the keywords
#     - use "head -1" to select just the first one of the lines found
#       (in case the egrep happens to match many lines)
#     - save the line found (if any) in a variable for use in Part II
#     - if nothing was found, print an error and return a non-zero
#       status from the function (do not exit the script)
#   The Part I regular expression is saved in a variable so that it can
#   be re-used in Part II.  The regular expression always takes this form:
#      - start at the beginning of the line
#      - allow any number of blanks around the comment char "#"
#      - allow one or more blanks between the keywords
#      - accept upper or lower case leading letters on the keywords
#      - let the trailing colon on the keywords be optional
#
# Part II - validate the arguments on the line found in Part I.
#   This uses the base regular expression from Part I and adds to it.
#   The validations vary depending on the line being validated.
#   Some lines have several validation tests applied to them.
#   Print an error message (echo the line from Part I in the message)
#   and return non-zero from the function if the validation fails.
#
# Note on code duplication: Since all Part I searches use the same basic
# structure (look for a pattern in the first N lines of the file), we should
# make the Part I code into a common helper function rather than duplicating
# the code.  I didn't do that in this version of the script.  There is
# some comment duplication; but, each function is independent
# and can be customized or cut/pasted for re-use in some other scripts.
# Rather than repeating the above (identical) documentation in each
# function, it is put here (and referred to in each function).   -IAN!

# ----------------------------------
# Test #1 - validate the Student Name comment line.
#   See the general Part I and Part II documentation, above.
#
TestStudentName () {
    # See the general Part I documentation, above.
    #
    regexp='^ *# *[Ss]tudent +[Nn]ame:?'
    line=$( head -$headlines "$1" | egrep "$regexp" | head -1 )
    if [ -z "$line" ] ; then
        echo 1>&2 "$0: '$1': Missing Student Name comment"
        return 1        # line not found - return bad status (not exit)
    fi

    # Part II: do further tests on the line found in Part I:
    #   See if the line we found has at least two letters following.
    #   Use a POSIX "white space" character class to match blanks/tabs.
    #
    try=$( echo "$line" | egrep "$regexp"'[[:space:]]+[a-zA-Z][a-zA-Z]' )
    if [ -z "$try" ] ; then
        echo 1>&2 "$0: '$1': Missing letters after Student Name: $line"
        return 2        # return bad status (does not exit)
    fi
    return 0  # no errors - must be a valid line - return good status
}

# ----------------------------------
# Test #2 - validate the Algonquin Email Address comment line.
#   See the general Part I and Part II documentation, above.
#
TestAlgEmail () {
    # See the general Part I documentation, above.
    #
    regexp='^ *# *[Aa]lgonquin +[Ee]-?[Mm]ail +[Aa]ddress:?'
    line=$( head -$headlines "$1" | egrep "$regexp" | head -1 )
    if [ -z "$line" ] ; then
        echo 1>&2 "$0: '$1': Missing Algonquin Email Address comment"
        return 1        # line not found - return bad status (not exit)
    fi

    # Part II: do further tests on the line found in Part I:

    # Reject the line if it contains hotmail or yahoo anywhere.
    # Make sure to match upper/lower case using -i.
    #
    try=$( echo "$line" | egrep -i "$regexp"'.*(hotmail|yahoo)' )
    if [ ! -z "$try" ] ; then
        echo 1>&2 "$0: '$1': Email address contains hotmail or yahoo: $line"
        return 2        # return bad status (does not exit)
    fi

    # BONUS mark:
    # The label specifications say that "@" is optional in this field.
    # If the line does contain @, make sure it is @algonquincollege.com
    # Make sure to match upper/lower case using -i.
    #
    try=$( echo "$line" | egrep "$regexp"'.*@' )
    if [ ! -z "$try" ] ; then
        try=$( echo "$line" | egrep -i "$regexp"'.*@algonquincollege\.com *$' )
        if [ -z "$try" ] ; then
            echo 1>&2 "$0: '$1': Email address does not contain" \
                "@algonquincollege.com: $line"
            return 2    # return bad status (does not exit)
        fi
    fi
    return 0  # no errors - must be a valid line - return good status
}

# ----------------------------------
# Test #3 - validate the Student Number comment line.
#   See the general Part I and Part II documentation, above.
#
TestStudNo () {
    # See the general Part I documentation, above.
    #
    regexp='^ *# *[Ss]tudent +[Nn]umber:?'
    line=$( head -$headlines "$1" | egrep "$regexp" | head -1 )
    if [ -z "$line" ] ; then
        echo 1>&2 "$0: '$1': Missing Student Number comment"
        return 1        # line not found - return bad status (not exit)
    fi

    # Part II: do further tests on the line found in Part I:

    # See if the line has proper format student number:
    # Run the line through a more strict regular expression:
    #  - white space
    #  - 3 digits
    #  - optional dashes or blanks
    #  - 3 digits
    #  - optional dashes or blanks
    #  - 3 digits
    #  - optional blanks to end of line
    #
    # Define some variables to make the expression shorter (less duplication)
    #
    dig3='[0-9][0-9][0-9]'      # three digits
    btwn='[- ]*'                # optional dashes or blanks go between
    try=$( echo "$line" \
        | egrep "$regexp[[:space:]]+$dig3$btwn$dig3$btwn$dig3 *"'$' )
    if [ -z "$try" ] ; then
        echo 1>&2 "$0: '$1': bad format Student Number in: $line"
        return 2        # return bad status (does not exit)
    fi
    return 0  # no errors - must be a valid line - return good status
}

# ----------------------------------
# Test #4 - validate the Course Number comment line.
#   See the general Part I and Part II documentation, above.
# 
TestCourseNo () {
    # See the general Part I documentation, above.
    #
    regexp='^ *# *[Cc]ourse +[Nn]umber:?'
    line=$( head -$headlines "$1" | egrep "$regexp" | head -1 )
    if [ -z "$line" ] ; then
        echo 1>&2 "$0: '$1': Missing Course Number comment"
        return 1        # line not found - return bad status (not exit)
    fi

    # Part II: do further tests on the line found in Part I:

    # See if the line has proper format course number:
    # Run the line through a more strict regular expression:
    #  - white space
    #  - CST (upper or lower, but not mixed)
    #  - optional blanks or dashes
    #  - 8129
    #  - optional blanks to end of line

    try=$( echo "$line" | egrep "$regexp"'[[:space:]]+(cst|CST)[- ]*8129 *$' )
    if [ -z "$try" ] ; then
        echo 1>&2 "$0: '$1': bad format Course Number in: $line"
        return 2        # return bad status (does not exit)
    fi
    return 0  # no errors - must be a valid line - return good status
}

# ----------------------------------
# Test #5 - validate the Lab Section Number comment line.
#   See the general Part I and Part II documentation, above.
#
TestLabSect () {
    # See the general Part I documentation, above.
    #
    regexp='^ *# *[Ll]ab +[Ss]ection +[Nn]umber:?'
    line=$( head -$headlines "$1" | egrep "$regexp" | head -1 )
    if [ -z "$line" ] ; then
        echo 1>&2 "$0: '$1': Missing Lab Section Number comment"
        return 1        # line not found - return bad status (not exit)
    fi

    # Part II: do further tests on the line found in Part I:

    # Run the line through a more strict regular expression:
    #  - white space
    #  - optional leading zero
    #  - digit 1 followed by 1, 2, 3, or 4
    #  - optional blanks to end of line
    #
    try=$( echo "$line" | egrep "$regexp"'[[:space:]]+0?1[1234] *$' )
    if [ -z "$try" ] ; then
        echo 1>&2 "$0: '$1': bad format Lab Section Number in: $line"
        return 2        # return bad status (does not exit)
    fi
    return 0  # no errors - must be a valid line - return good status
}

# ++++++++++++++++++++++++++++++
# Other Validation Functions (4)
# ++++++++++++++++++++++++++++++

# This next set of testing functions check various other fields.
# Each test is unique; see the documentation in each function.

# ----------------------------------
# Test #6 - validate the shell in the first line of the script.
#   Accept only  #!/bin/bash -u  or  #!/bin/sh -u
#
TestBashFirst () {
    # Get just the first line from the script (may be empty).
    # Run the line through this regular expression:
    #  - start of line
    #  - string "#!/bin/" (no blanks allowed)
    #  - optional string "ba"
    #  - string "sh"
    #  - one or more blanks
    #  - "-u" at end of line (no blanks allowed after)
    #
    line=$( head -1 "$1" )
    try=$( echo "$line" | egrep '^#!/bin/(ba)?sh +-u$' )
    if [ -z "$try" ] ; then
        echo 1>&2 "$0: '$1': incorrect first line of script: $line"
        return 1        # return bad status (does not exit)
    fi
    return 0  # no errors - must be a valid line - return good status
}

# ----------------------------------
# Test #7 - validate the shell PATH= line.
#   Accept only  PATH=/bin:/usr/bin
#
TestPath () {
    # Look for the first occurrence of PATH= that isn't in a shell
    # comment line ([^#]*).  (We don't care about PATH in comments.)
    # Catch people who leave blanks around their "=" too.
    #
    line=$( egrep '^[^#]*PATH *=' "$1" | head -1 )
    if [ -z "$line" ] ; then
        echo 1>&2 "$0: '$1': missing PATH= line"
        return 1        # line not found - return bad status (not exit)
    fi

    # Run the line through this regular expression:
    #  - beginning of line
    #  - optional white space
    #  - optional: word "export" followed by one or more blanks
    #  - the exact string: PATH=/bin:/usr/bin
    #  - zero or more blanks
    #  - a comment character "#", semicolon ";", or end-of-line
    # (Note that (#|;|$) is not the same as [#;$]!)
    #
    path='PATH=/bin:/usr/bin'
    try=$( echo "$line" | egrep \
        '^[[:space:]]*(export[[:space:]]+)?'"$path"'[[:space:]]*(#|;|$)' )
    if [ -z "$try" ] ; then
        echo 1>&2 "$0: '$1': unrecognized PATH= line: $line"
        return 2        # return bad status (does not exit)
    fi
    return 0  # no errors - must be a valid line - return good status
}

# ----------------------------------
# Test #8 - validate the first umask line found anywhere in the script.
#   Accept only  umask  followed by a 0 and then 2-3 more digits
#
TestUmask () {
    # Look for the first occurrence of umask at the start of a line
    # (ignore leading whitespace).  Also catch a common spelling error:
    #
    line=$( egrep '^[[:space:]]*un?mask[[:space:]]' "$1" )
    if [ -z "$line" ] ; then
        echo 1>&2 "$0: '$1': missing umask line"
        return 1        # line not found - return bad status (not exit)
    fi

    # Run the line through this regular expression:
    #  - beginning of line
    #  - optional white space
    #  - the exact word: umask
    #  - one or more white space chars
    #  - a digit zero
    #  - two or three (not four) more digits from the set 0-7
    #  - optional white space
    #  - a comment character "#", semicolon ";", or end-of-line
    #
    dig23='[0-7][0-7][0-7]?' # or [0-7]{2,3}
    try=$( echo "$line" | egrep \
        '^[[:space:]]*umask[[:space:]]+0'"$dig23"'[[:space:]]*(#|;|$)' )
    if [ -z "$try" ] ; then
        echo 1>&2 "$0: '$1': unrecognized umask line: $line"
        return 2        # return bad status (does not exit)
    fi
    return 0  # no errors - must be a valid line - return good status
}

# End of function definitions ---------------------------------------

# Main program starts here.

# Call each of the 8 validation functions, passing in the name of
# the script file to open and validate.  Add up the number of
# errors found.
#
errcount=0
if ! TestStudentName "$pathname" ; then
    let errcount=errcount+1
fi
if ! TestAlgEmail "$pathname" ; then
    let errcount=errcount+1
fi
if ! TestStudNo "$pathname" ; then
    let errcount=errcount+1
fi
if ! TestCourseNo "$pathname" ; then
    let errcount=errcount+1
fi
if ! TestLabSect "$pathname" ; then
    let errcount=errcount+1
fi
if ! TestBashFirst "$pathname" ; then
    let errcount=errcount+1
fi
if ! TestPath "$pathname" ; then
    let errcount=errcount+1
fi
if ! TestUmask "$pathname" ; then
    let errcount=errcount+1
fi

# Done all 8 tests.  Now set the exit code:
#    Exit 0 if the argument pathname passed all of the tests without error.
#    If the argument pathname had errors, exit with a value that is the
#    number of errors plus 10, e.g. exit 15 for a count of 5 errors.
#
exitstatus=0
if [ $errcount -gt 0 ] ; then
    let exitstatus=errcount+10
    # echo "DEBUG errcount is $errcount exitstatus is $exitstatus"
fi
exit $exitstatus