#!/bin/sh -u # A CGI weather script to show the current temperature in select cities. #------------------------------------------------------------------ # Shell script syntax: $0 # CGI script syntax: http://..../$0? #------------------------------------------------------------------ # Purpose: NET2003 Data Mining using Shell Scripts # Display an image. # Display some contact information. # Display the current weather for a given city. # - use shell commands to find the weather URL for a city in an index page # - go to that URL and extract the temperature for that given city # HTML reference: http://www.w3schools.com/tags/default.asp # CSS Styles Reference: http://www.w3schools.com/css/css_reference.asp # # Comments starting with #I! are instructor comments to students. # They would not appear in a normal shell script. # #I! Always double-quote all your variables when you use them! # # This script has a bug in it. It can't find "Montreal" (no accents). #------------------------------------------------------------------ # License: GNU Public License # -Ian! D. Allen - idallen@idallen.ca - www.idallen.com ############################################################################# # CGI script set-up. # This will be an HTML document. Set the content type and charset. # If input is not a terminal, assume we are a CGI script and generate the # two HTTP header lines needed. Redirect stderr onto stdout (into browser): # content='text/html; charset=iso-8859-1' if [ ! -t 0 ] ; then echo "Content-Type: $content" echo "" exec 2>&1 fi ############################################################################# # Shell script set-up. # Use standard search path, friendly umask, ASCII collating and sorting. # Set the language and character set to be ASCII/C standard. # PATH=/bin:/usr/bin ; export PATH LC_COLLATE=C ; export LC_COLLATE LANG=C ; export LANG umask 022 ############################################################################# # HTML document set-up. # Output the content type in the HTML header as well as above. # Wrap long lines at 40em width. Set a background colour. #I! Note the use of single quotes inside double. # echo "" echo "" echo "Ian!s Face and Weather Page" echo "" echo "" echo "" echo "

Ian!s Face and Weather Page

" # Let the image float to the right of the following text. # Pad the image on the left (but not top, right, or bottom). #I! Note the use of double quotes inside single. # echo '' # Give some contact information. # echo "

Contact Information

" echo "

" echo "You can contact me at:" echo "" echo "idallen@idallen.ca" echo "" echo "

" echo "

" echo "My home page is here:" echo "" echo "idallen.com" echo "" echo "

" ############################################################################# # Data Mining Section. # To look up a city, we need an index page that pairs cities with # URLs that have the weather for those cities. I located that web page # and played around until I had some commands that would extract the # URL for any given city. I then go to that URL and get the weather. # Get an index page with a list of cities for which we can fetch the weather. # Redirect wget standard error into the variable, too, to catch errors. # Fetch the page in raw HTML format so that we can see the URLs used. #I! You can use "wget -O" to fetch an unformatted HTML web page. #I! Command substitution $(...) captures the standard output of wget. #I! Redirection 2>&1 means we also capture standard error output. # indexurl='http://text.weatheroffice.gc.ca/canada_e.html' citypage=$( wget 2>&1 -nv -O - "$indexurl" ) # Error checking the index page: # See if the index page contains Ottawa. If not, must be an error. #I! The IF statement tests the return status from grep (the last command). #I! Note the use of leading ! to invert the return status of the comand line. #I! The -q option to grep suppresses the output of grep. #I! All I care about is the return status, not the output. # if ! echo "$citypage" | grep -q "Ottawa" ; then echo "

" echo "I cannot fetch the index page from: " echo "$indexurl" echo "

" # We hope the error message is listed in the wget output somewhere. # Output what we found with the HTML escaped to square brackets #I! The "tr" translate command is very useful in data mining. echo "
"
        echo    "$citypage" | tr '<>' '[]'
        echo "
" # Close the web page and exit the script. echo "" echo "" exit 1 fi # Create a list of cities from the index page: # Here's the tricky part. I had to work on this at the command line # until I got the pipeline of commands just right, then I copied the # pipeline of commands into the script here and formatted it nicely. # Here's how to data-mine the index page to get a list of cities: # # Change all occurrences of the two characters <' into newlines. # Look for lines containing the /forecast/city URL at the start and # extract that line and the line that follows it (the city name). # Every one of those /forecast/city URLs is followed by a city name; so, # from the pairs, pick off only the lines (city names) that start with >. # Remove the > from the lines, leaving just the city names. #I! The "tr" translate command is very useful for data mining. #I! The -A (after) and -B (before) options to grep are also very useful. # citieslist=$( echo "$citypage" \ | tr "<'" '\n' \ | grep -A 1 '^/forecast/city' \ | grep '^>' \ | tr -d '>' ) # Handle the user-supplied city name argument: # The user can pass in an optional argument to this script. # If we have a city argument to the script, use it, otherwise give a list. # We pick out the environment variables SERVER_NAME and SCRIPT_NAME # if we are a CGI script; otherwise (not CGI), we give a usage message. #I! $# is the count of the number of arguments to the shell script #I! $0 is the Unix pathname (name) of the shell script. #I! $1 is the first argument to the shell script. #I! In a CGI script, the argument follows the URL, separated by '?'. # if [ "$#" -eq 0 ]; then # No arguments; the user didn't give us a city name. Show a list # of cities, padded on the left to indent the list a bit. # Use the -n option of "echo" to suppress the trailing newline. # Run the cities list through the stream editor "sed" to surround # each city name with a self-referential HTML anchor hyperlink. # Avoid putting parentheses in the link (doesn't work). # echo "

I can get weather for these cities:

" echo -n "
"
        link="http://$SERVER_NAME$SCRIPT_NAME?"
        echo "$citieslist" | sed -e \
            "s!^[^()]*!&!"
        echo "
" # Since no argument was given: Default to using Ottawa as the city. # Give different usage messages for a CGI script vs. # a command-line shell script run at a terminal. # city='Ottawa' echo "

" if [ -t 0 ] ; then # This message is for a terminal. echo "Just give the city name as an argument." echo "
" echo "For example: " echo "$0 $city" else # This message is for a non-terminal (must be a CGI script). echo "Just append the city name to the URL" echo "after a question mark." echo "
" echo "For example: " echo "" echo "http://$SERVER_NAME$SCRIPT_NAME?$city" echo "" fi echo "

" else # The user gave a argument to the script. # We will use the given argument as the city name to look up. # Make sure the given argument is one of the cities we recognize. # If it isn't recognized, print a list and default to Ottawa. # Run the cities list through the stream editor "sed" to surround # each city name with a self-referential HTML anchor hyperlink. # Avoid putting parentheses in the link (doesn't work). # city=$1 if ! echo "$citieslist" | grep -q -w "^$city" ; then # Cannot find that city in the cities list. Error: echo "

" echo "I cannot find '$city'

" echo "

I looked in " echo "$indexurl" echo "

" echo "

I can get weather for these cities:

" echo "

" link="http://$SERVER_NAME$SCRIPT_NAME?" echo "$citieslist" | sed -e \ "s!^[^()]*!&!" echo "

" # Default to using Ottawa as the city. city='Ottawa' fi fi echo "

I will display '$city'

" # Search for the $city in the index page and find its city URL: # Split the index page on quote characters, to put the URL on its own line. # The city URL we want is the line *before* the city in the web page: #I! The -B option also gets one line *before* the line matching the pattern # cityurl=$( echo "$citypage" | tr "'" '\n' | grep -B 1 -w "$city" | head -1 ) # Data checking: Make sure we found a proper format city URL. # Substitute the Ottawa city URL if we didn't find anything. # Run the cities list through the stream editor "sed" to surround # each city name with a self-referential HTML anchor hyperlink. # Avoid putting parentheses in the link (doesn't work). # if ! echo "$cityurl" | grep -q '^/forecast/city' ; then # We didn't get the right URL out of the index page for some reason. echo "

" echo "I cannot find '$city'

" echo "

I looked in " echo "$indexurl" echo "

" echo "

I can get weather for these cities:

" echo "

" link="http://$SERVER_NAME$SCRIPT_NAME?" echo "$citieslist" | sed -e \ "s!^[^()]*!&!" echo "

" # Default to using the Ottawa URL as the city URL. cityurl='/forecast/city_e.html?on-118' fi # Fetch the whole formatted text version of the city URL into a variable. # The formatted page is easier to parse than the raw URL. #I! Note the use of double quotes to allow variable $cityurl to expand # url="http://text.weatheroffice.gc.ca/$cityurl" page=$( lynx -display-charset=iso-8859-1 -dump "$url" ) # Check for the string "Temperature:" in the returned web page. # Print the error message in red if we can't find it. # if ! echo "$page" | grep -q 'Temperature:' ; then # We didn't find any Temperature for some reason. Wrong page? echo "

" echo " Error Fetching Weather Page for '$city' via '$url'" echo "

" # Output what we found with the HTML escaped to square brackets echo "
"
        echo    "$page" | tr '()' '[]'
        echo "

" # Explain that the temperature is not available obsline='for $city' templine='not available' else # We found "Temperature:" - it must be a valid weather page. # Use grep to extract the line containing "Observed". # Use grep to extract the line following "Temperature". # obsline=$( echo "$page" | grep 'Observed ' | head -1 ) templine=$( echo "$page" | grep -A 1 -e 'Temperature:' \ | head -2 | tail -1 ) fi ############################################################################# # Output the found data with italic and bold emphasis: #I! This could all be one single echo statement; but, I liked this better: # echo "

" echo "The temperature" echo "$obsline" echo "is" echo "$templine" echo "

" ############################################################################# # HTML document closing. Script exit. # echo "" echo "" exit 0