|
if($#ARGV < 0)
{
die "\nUsage: $prog \n";
}
$url = $ARGV[0];
$table_no = $ARGV[1];
$row_no = $ARGV[2];
$column_no = $ARGV[3];
$overwrite = $ARGV[4];
$outputfile = $ARGV[5];
my $ua = LWP::UserAgent->new( keep_alive => 1,
timeout => 300,
);
$oResolve = CreateObject OLE "WebResolve.WebResolve";
$oNetwork = CreateObject OLE "Wscript.Network";
$sComputerName = $oNetwork->{ComputerName};
$oResolve->{host} = $sComputerName;
$machineIP = $oResolve->Resolve;
$UseProxy = 0;
$IP_proxyprefix = "155.69";
$IP_noproxyprefix = "192.168";
if ($machineIP =~ /^$IP_proxyprefix/ )
{
$ua->proxy('http', 'http://proxy1.ntu.edu.sg:8080');
}
$response = $ua->get($url);
$html = $response->content;
if ( $overwrite eq "y" )
{
open(outhandle,">$outputfile");
}
else
{
open(outhandle,">>$outputfile");
}
@tblcells = ExtractTable($html,$table_no,$row_no,$column_no);
foreach $tblcell (@tblcells) {
$tblcell =~ s/<(\w+) [^>]*>/<>/gi;
$tblcell =~ s/<\w+>//gi;
$tblcell =~ s/<\/\w+>//gi;
$tblcell =~ s/ +/ /gi;
$tblcell =~ s/\t//gi;
$tblcell =~ s/ //gi;
$tblcell =~ s/&/&/gi;
$tblcell =~ s/^ +//gi;
$tblcell =~ s/ +$//gi;
if ( $tblcell ne "" )
{
print "$tblcell\n" ;
print outhandle "$tblcell\n";
}
}
close(outhandle);
sub ExtractTable()
{
my $inbuf = shift;
my $SelTable = shift;
my $SelRow = shift;
my $SelCol = shift;
my $outbuf = "";
my $iTblNo = 0;
my $iRwNo = 0;
my $iColNo = 0;
my $rowbuf = "";
my $celldata = "";
my @outarray ;
$p = HTML::TableContentParser->new();
$tables = $p->parse($inbuf);
$iTblNo = 0;
for $t (@$tables)
{
if ( ($SelTable == -1) || ($iTblNo == $SelTable ) )
{
$iRwNo = 0;
for $r (@{$t->{rows}})
{
if ( ( $SelRow == -1 ) || ( $iRwNo == $SelRow ) )
{
$iColNo = 0;
$rowbuf = "";
for $c (@{$r->{cells}})
{
if ( ( $SelCol == -1 ) || ( $iColNo == $SelCol ) )
{
$celldata = $c->{data};
$rowbuf = $rowbuf . sprintf "%s|",$celldata;
}
$iColNo++;
}
$rowbuf =~ s/\|$//gi;
push @outarray, $rowbuf;
}
$iRwNo++;
}
}
$iTblNo++;
}
return @outarray;
}
|
|