Software >> Development >> Languages >> Perl >> How to download specific table, row, column of a table from a website

if($#ARGV < 0) { die "\nUsage: $prog <url> <table-no> <row-no> <column-no> <overwrite> <outputfile>\n"; } $url = $ARGV[0]; $table_no = $ARGV[1]; $row_no = $ARGV[2]; $column_no = $ARGV[3]; $overwrite = $ARGV[4]; $outputfile = $ARGV[5]; my $ua = LWP::UserAgent->new( keep_alive => 1, timeout => 300, ); $oResolve = CreateObject OLE "WebResolve.WebResolve"; $oNetwork = CreateObject OLE "Wscript.Network"; $sComputerName = $oNetwork->{ComputerName}; $oResolve->{host} = $sComputerName; $machineIP = $oResolve->Resolve; $UseProxy = 0; $IP_proxyprefix = "155.69"; $IP_noproxyprefix = "192.168"; if ($machineIP =~ /^$IP_proxyprefix/ ) { $ua->proxy('http', 'http://proxy1.ntu.edu.sg:8080'); } $response = $ua->get($url); $html = $response->content; if ( $overwrite eq "y" ) { open(outhandle,">$outputfile"); } else { open(outhandle,">>$outputfile"); } @tblcells = ExtractTable($html,$table_no,$row_no,$column_no); foreach $tblcell (@tblcells) { $tblcell =~ s/<(\w+) [^>]*>/<>/gi; $tblcell =~ s/<\w+>//gi; $tblcell =~ s/<\/\w+>//gi; $tblcell =~ s/ +/ /gi; $tblcell =~ s/\t//gi; $tblcell =~ s/ //gi; $tblcell =~ s/&/&/gi; $tblcell =~ s/^ +//gi; $tblcell =~ s/ +$//gi; if ( $tblcell ne "" ) { print "$tblcell\n" ; print outhandle "$tblcell\n"; } } close(outhandle); sub ExtractTable() { my $inbuf = shift; my $SelTable = shift; my $SelRow = shift; my $SelCol = shift; my $outbuf = ""; my $iTblNo = 0; my $iRwNo = 0; my $iColNo = 0; my $rowbuf = ""; my $celldata = ""; my @outarray ; $p = HTML::TableContentParser->new(); $tables = $p->parse($inbuf); $iTblNo = 0; for $t (@$tables) { if ( ($SelTable == -1) || ($iTblNo == $SelTable ) ) { $iRwNo = 0; for $r (@{$t->{rows}}) { if ( ( $SelRow == -1 ) || ( $iRwNo == $SelRow ) ) { $iColNo = 0; $rowbuf = ""; for $c (@{$r->{cells}}) { if ( ( $SelCol == -1 ) || ( $iColNo == $SelCol ) ) { $celldata = $c->{data}; $rowbuf = $rowbuf . sprintf "%s|",$celldata; } $iColNo++; } $rowbuf =~ s/\|$//gi; push @outarray, $rowbuf; } $iRwNo++; } } $iTblNo++; } return @outarray; }