#!/usr/bin/perl
#
# Module to generate HTML reports with hyperlinks to databases. It also features subroutines to search Medline and Google.
use LWP::Simple;
use LWP::UserAgent;
use HTTP::Request;
use HTTP::Response;
use HTML::TreeBuilder;
use HTML::FormatText;
# This subroutine creates outputs as an HTML file with hyperlinks to GenBank, GeneCards, and Medline.
sub info {
my $out_file = shift @_;
my $med_search = shift @_;
open OUT, ">$out_file";
print OUT "\n
Auto-generated file with GenBank, GeneCards, and Medline automated searches\n";
print OUT 'GenBank, GeneCards, and Medline links
';
print OUT '';
print OUT '| Gene Symbol | GenBank | GeneCards | Medline |
';
foreach $gene (@_) {
undef @tmp;
my @tmp = split / /, $gene;
if ($tmp[1] =~ /\w+/) {
$acc = $tmp[1];
} else {
$acc = $gene;
}
$gene = $tmp[0];
print OUT "| $gene | ";
print OUT ''.'GenBank | ';
print OUT ''.'GeneCards | ';
print OUT ''.'PubMed | ';
print OUT "
";
}
print OUT "
\n\n";
}
# Subroutine to check the number of entries in Medline for a given query term
sub medline_count {
my $query = $_[0];
$query =~ s/\s+/\+/g;
my $URL ='http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&tool=arct&db=pubmed&dispmax=1&doptcmdl=uilist&term='.$query;
my $retrieve = get($URL);
my $tree = HTML::TreeBuilder->new_from_content($retrieve);
my $formatter = HTML::FormatText->new(leftmargin => 0, rightmargin => 50);
my $text = $formatter->format($tree);
my @lines = split/\n/, $text;
my $count = 0;
foreach (@lines) {
if (/Item 1 of (\d+).*/) {
$count = $1;
}
}
return $count;
}
# For a given $query, google_count returns the number of hits in Google plus the correction suggested by Google, if any.
# Example:
# print google_count("aging"),"\n",google_count("aghing");
sub google_count {
my $query = $_[0];
$query =~ s/\s+/\+/g;
my $URL ='http://www.google.com/custom?q='.$query;
my $ua = new LWP::UserAgent;
$ua->agent('Mozilla/4.0');
my $req = new HTTP::Request GET => "$URL";
my $res = $ua->request($req);
my $retrieve = $res->content;
my $tree = HTML::TreeBuilder->new_from_content($retrieve);
my $formatter = HTML::FormatText->new(leftmargin => 0, rightmargin => 50);
my $text = $formatter->format($tree);
my @lines = split/\n/, $text;
my $count = 0;
my $correction = "";
foreach (@lines) {
if (/Results 1 - 10 of about (\d+,*\d*)/) {
$count = $1;
} elsif (/Results 1 - (\d+).*/) {
$count = $1;
} elsif (/Did you mean: (.*)/) {
$correction = $1;
}
}
$count =~ s/,//g;
return $count, $correction;
}
# Subroutine to check if a given SWISS-PROT variant is a polymorphism or a disease. Make sure you use only numbers and
# not the "VAR_" characters in $query.
# Example:
# print var_type("018942");
#
sub var_type {
my ($var) = @_;
my $type = "Unknown"; # Default
# Make sure $var has leading zeros
while ( length($var) < 6 ) {
$var = "0".$var;
}
my $URL ='http://au.expasy.org/cgi-bin/get-sprot-variant.pl?VAR_'.$var;
my $retrieve = get($URL);
if ($retrieve =~ /polymorphism or unclassified\)<\/td>\s+(\w+)<\/td>/) {
$type = $1;
}
return $type;
}
1;
|