Sample 1: download files in html source
#!/bin/bash
inputFile=$1
exampleRegExp=".*href=\"(.*.[:alpha:]{1,4})[\"]{1}.*"
while read line
do
#if [[ $exampleRegExp =~ $line ]]; then
# echo $line
#fi
if [[ $line =~ $exampleRegExp ]]; then
#echo $line
echo ${BASH_REMATCH[1]}
(cd src; wget http://jcifs.samba.org/src/examples/${BASH_REMATCH[1]} )
fi
done < ${inputFile}
======================================
Data File Sample
<tr><td valign="top"><img src="/icons/unknown.gif" alt="[ ]"></td><td><a href="UrlReader.java">UrlReader.java</a></td><td align="right">18-Oct-2011 15:26 </td><td align="right">1.0K</td></tr>
<tr><td valign="top"><img src="/icons/unknown.gif" alt="[ ]"></td><td><a href="VerifyGuest.java">VerifyGuest.java</a></td><td align="right">18-Oct-2011 15:26 </td><td align="right">896 </td></tr>
<tr><td valign="top"><img src="/icons/unknown.gif" alt="[ ]"></td><td><a href="VerifyIO.java">VerifyIO.java</a></td><td align="right">18-Oct-2011 15:26 </td><td align="right">2.1K</td></tr>
<tr><td valign="top"><img src="/icons/unknown.gif" alt="[ ]"></td><td><a href="VerifyReads.java">VerifyReads.java</a></td><td align="right">18-Oct-2011 15:26 </td><td align="right">2.1K</td></tr>
<tr><td valign="top"><img src="/icons/unknown.gif" alt="[ ]"></td><td><a href="WaitNamedPipe.java">WaitNamedPipe.java</a></td><td align="right">18-Oct-2011 15:26 </td><td align="right">1.1K</td></tr>
Sample 2: Cut information from html source
#!/bin/bash
#set -x
inputFile=$1
#<h4 class="sect3"><span class="secnum">1.13.1.2</span> Volume Shadow Copy Service (VSS) Writer</h4>
exampleRegExp=".*secnum\">(.*.[0-9]{1,2})<\/span>( .*)<\/h.*>$"
while read line
do
if [[ $line =~ $exampleRegExp ]]; then
echo ${BASH_REMATCH[1]} ${BASH_REMATCH[2]}fi
done < $inputFile
Sample 3: Grab information from DDL scripts
#!/bin/bash#set -x
# `Place_ID` bigint(20) NOT NULL AUTO_INCREMENT COMMENT 'Event ID',
inputFile=$1
regExp=".*\`(.*)\`[ \t](.*)[\(]{1}([0-9]+).*'(.*)'.*{1}"
while read line
do
if [[ $line =~ $regExp ]]; then
echo ${BASH_REMATCH[1]} ${BASH_REMATCH[2]} ${BASH_REMATCH[3]} ${BASH_REMATCH[4]}
fi
done < sql/$inputFile
Sample 4: backslash in source file
#!/bin/bash
#set -x
inputFile=$1
#exampleRegExp=".*href=\"(.*.[:alpha:]{1,4})[\"]{1}.*"
exampleRegExp=".*href=\"([0-9]{4})[\\]{1}(.*.pdf)[\"]{1}.*"
#exampleRegExp="[0-9]{4}\\.*.pdf"
while read -r line
do
#if [[ $exampleRegExp =~ $line ]]; then
# echo $line
#fi
if [[ $line =~ $exampleRegExp ]]; then
#echo $line
echo "${BASH_REMATCH[1]}"
year="${BASH_REMATCH[1]}"
echo "${BASH_REMATCH[2]}"
mkdir -p ${year}
(cd ${year}; wget http://www.people.okanagan.bc.ca/clee/bcssmc/${BASH_REMATCH[1]}/${BASH_REMATCH[2]} )
fi
done < ${inputFile}