#!/bin/sh -u
# A CGI weather script to show the current temperature in select cities.
#------------------------------------------------------------------
# Shell script syntax: $0
# CGI script syntax: http://..../$0?
#------------------------------------------------------------------
# Purpose: NET2003 Data Mining using Shell Scripts
# Display an image.
# Display some contact information.
# Display the current weather for a given city.
# - use shell commands to find the weather URL for a city in an index page
# - go to that URL and extract the temperature for that given city
# HTML reference: http://www.w3schools.com/tags/default.asp
# CSS Styles Reference: http://www.w3schools.com/css/css_reference.asp
#
# Comments starting with #I! are instructor comments to students.
# They would not appear in a normal shell script.
#
#I! Always double-quote all your variables when you use them!
#
# This script has a bug in it. It can't find "Montreal" (no accents).
#------------------------------------------------------------------
# License: GNU Public License
# -Ian! D. Allen - idallen@idallen.ca - www.idallen.com
#############################################################################
# CGI script set-up.
# This will be an HTML document. Set the content type and charset.
# If input is not a terminal, assume we are a CGI script and generate the
# two HTTP header lines needed. Redirect stderr onto stdout (into browser):
#
content='text/html; charset=iso-8859-1'
if [ ! -t 0 ] ; then
echo "Content-Type: $content"
echo ""
exec 2>&1
fi
#############################################################################
# Shell script set-up.
# Use standard search path, friendly umask, ASCII collating and sorting.
# Set the language and character set to be ASCII/C standard.
#
PATH=/bin:/usr/bin ; export PATH
LC_COLLATE=C ; export LC_COLLATE
LANG=C ; export LANG
umask 022
#############################################################################
# HTML document set-up.
# Output the content type in the HTML header as well as above.
# Wrap long lines at 40em width. Set a background colour.
#I! Note the use of single quotes inside double.
#
echo ""
echo ""
echo "Ian!s Face and Weather Page"
echo ""
echo ""
echo ""
echo "Ian!s Face and Weather Page
"
# Let the image float to the right of the following text.
# Pad the image on the left (but not top, right, or bottom).
#I! Note the use of double quotes inside single.
#
echo ''
# Give some contact information.
#
echo "Contact Information
"
echo ""
echo "You can contact me at:"
echo ""
echo "idallen@idallen.ca"
echo ""
echo "
"
echo ""
echo "My home page is here:"
echo ""
echo "idallen.com"
echo ""
echo "
"
#############################################################################
# Data Mining Section.
# To look up a city, we need an index page that pairs cities with
# URLs that have the weather for those cities. I located that web page
# and played around until I had some commands that would extract the
# URL for any given city. I then go to that URL and get the weather.
# Get an index page with a list of cities for which we can fetch the weather.
# Redirect wget standard error into the variable, too, to catch errors.
# Fetch the page in raw HTML format so that we can see the URLs used.
#I! You can use "wget -O" to fetch an unformatted HTML web page.
#I! Command substitution $(...) captures the standard output of wget.
#I! Redirection 2>&1 means we also capture standard error output.
#
indexurl='http://text.weatheroffice.gc.ca/canada_e.html'
citypage=$( wget 2>&1 -nv -O - "$indexurl" )
# Error checking the index page:
# See if the index page contains Ottawa. If not, must be an error.
#I! The IF statement tests the return status from grep (the last command).
#I! Note the use of leading ! to invert the return status of the comand line.
#I! The -q option to grep suppresses the output of grep.
#I! All I care about is the return status, not the output.
#
if ! echo "$citypage" | grep -q "Ottawa" ; then
echo ""
echo "I cannot fetch the index page from: "
echo "$indexurl"
echo "
"
# We hope the error message is listed in the wget output somewhere.
# Output what we found with the HTML escaped to square brackets
#I! The "tr" translate command is very useful in data mining.
echo ""
echo "$citypage" | tr '<>' '[]'
echo "
"
# Close the web page and exit the script.
echo ""
echo ""
exit 1
fi
# Create a list of cities from the index page:
# Here's the tricky part. I had to work on this at the command line
# until I got the pipeline of commands just right, then I copied the
# pipeline of commands into the script here and formatted it nicely.
# Here's how to data-mine the index page to get a list of cities:
#
# Change all occurrences of the two characters <' into newlines.
# Look for lines containing the /forecast/city URL at the start and
# extract that line and the line that follows it (the city name).
# Every one of those /forecast/city URLs is followed by a city name; so,
# from the pairs, pick off only the lines (city names) that start with >.
# Remove the > from the lines, leaving just the city names.
#I! The "tr" translate command is very useful for data mining.
#I! The -A (after) and -B (before) options to grep are also very useful.
#
citieslist=$( echo "$citypage" \
| tr "<'" '\n' \
| grep -A 1 '^/forecast/city' \
| grep '^>' \
| tr -d '>'
)
# Handle the user-supplied city name argument:
# The user can pass in an optional argument to this script.
# If we have a city argument to the script, use it, otherwise give a list.
# We pick out the environment variables SERVER_NAME and SCRIPT_NAME
# if we are a CGI script; otherwise (not CGI), we give a usage message.
#I! $# is the count of the number of arguments to the shell script
#I! $0 is the Unix pathname (name) of the shell script.
#I! $1 is the first argument to the shell script.
#I! In a CGI script, the argument follows the URL, separated by '?'.
#
if [ "$#" -eq 0 ]; then
# No arguments; the user didn't give us a city name. Show a list
# of cities, padded on the left to indent the list a bit.
# Use the -n option of "echo" to suppress the trailing newline.
# Run the cities list through the stream editor "sed" to surround
# each city name with a self-referential HTML anchor hyperlink.
# Avoid putting parentheses in the link (doesn't work).
#
echo "I can get weather for these cities:
"
echo -n ""
link="http://$SERVER_NAME$SCRIPT_NAME?"
echo "$citieslist" | sed -e \
"s!^[^()]*!&!"
echo "
"
# Since no argument was given: Default to using Ottawa as the city.
# Give different usage messages for a CGI script vs.
# a command-line shell script run at a terminal.
#
city='Ottawa'
echo ""
if [ -t 0 ] ; then
# This message is for a terminal.
echo "Just give the city name as an argument."
echo "
"
echo "For example: "
echo "$0 $city"
else
# This message is for a non-terminal (must be a CGI script).
echo "Just append the city name to the URL"
echo "after a question mark."
echo "
"
echo "For example: "
echo ""
echo "http://$SERVER_NAME$SCRIPT_NAME?$city"
echo ""
fi
echo "
"
else
# The user gave a argument to the script.
# We will use the given argument as the city name to look up.
# Make sure the given argument is one of the cities we recognize.
# If it isn't recognized, print a list and default to Ottawa.
# Run the cities list through the stream editor "sed" to surround
# each city name with a self-referential HTML anchor hyperlink.
# Avoid putting parentheses in the link (doesn't work).
#
city=$1
if ! echo "$citieslist" | grep -q -w "^$city" ; then
# Cannot find that city in the cities list. Error:
echo ""
echo "I cannot find '$city'
"
echo "I looked in "
echo "$indexurl"
echo "
"
echo "I can get weather for these cities:
"
echo ""
link="http://$SERVER_NAME$SCRIPT_NAME?"
echo "$citieslist" | sed -e \
"s!^[^()]*!&!"
echo "
"
# Default to using Ottawa as the city.
city='Ottawa'
fi
fi
echo "I will display '$city'
"
# Search for the $city in the index page and find its city URL:
# Split the index page on quote characters, to put the URL on its own line.
# The city URL we want is the line *before* the city in the web page:
#I! The -B option also gets one line *before* the line matching the pattern
#
cityurl=$( echo "$citypage" | tr "'" '\n' | grep -B 1 -w "$city" | head -1 )
# Data checking: Make sure we found a proper format city URL.
# Substitute the Ottawa city URL if we didn't find anything.
# Run the cities list through the stream editor "sed" to surround
# each city name with a self-referential HTML anchor hyperlink.
# Avoid putting parentheses in the link (doesn't work).
#
if ! echo "$cityurl" | grep -q '^/forecast/city' ; then
# We didn't get the right URL out of the index page for some reason.
echo ""
echo "I cannot find '$city'
"
echo "I looked in "
echo "$indexurl"
echo "
"
echo "I can get weather for these cities:
"
echo ""
link="http://$SERVER_NAME$SCRIPT_NAME?"
echo "$citieslist" | sed -e \
"s!^[^()]*!&!"
echo "
"
# Default to using the Ottawa URL as the city URL.
cityurl='/forecast/city_e.html?on-118'
fi
# Fetch the whole formatted text version of the city URL into a variable.
# The formatted page is easier to parse than the raw URL.
#I! Note the use of double quotes to allow variable $cityurl to expand
#
url="http://text.weatheroffice.gc.ca/$cityurl"
page=$( lynx -display-charset=iso-8859-1 -dump "$url" )
# Check for the string "Temperature:" in the returned web page.
# Print the error message in red if we can't find it.
#
if ! echo "$page" | grep -q 'Temperature:' ; then
# We didn't find any Temperature for some reason. Wrong page?
echo ""
echo " Error Fetching Weather Page for '$city' via '$url'"
echo "
"
# Output what we found with the HTML escaped to square brackets
echo ""
echo "$page" | tr '()' '[]'
echo "
"
# Explain that the temperature is not available
obsline='for $city'
templine='not available'
else
# We found "Temperature:" - it must be a valid weather page.
# Use grep to extract the line containing "Observed".
# Use grep to extract the line following "Temperature".
#
obsline=$( echo "$page" | grep 'Observed ' | head -1 )
templine=$( echo "$page" | grep -A 1 -e 'Temperature:' \
| head -2 | tail -1 )
fi
#############################################################################
# Output the found data with italic and bold emphasis:
#I! This could all be one single echo statement; but, I liked this better:
#
echo ""
echo "The temperature"
echo "$obsline"
echo "is"
echo "$templine"
echo "
"
#############################################################################
# HTML document closing. Script exit.
#
echo "