#!/usr/bin/perl # # Module to generate HTML reports with hyperlinks to databases. It also features subroutines to search Medline and Google. use LWP::Simple; use LWP::UserAgent; use HTTP::Request; use HTTP::Response; use HTML::TreeBuilder; use HTML::FormatText; # This subroutine creates outputs as an HTML file with hyperlinks to GenBank, GeneCards, and Medline. sub info { my $out_file = shift @_; my $med_search = shift @_; open OUT, ">$out_file"; print OUT "\nAuto-generated file with GenBank, GeneCards, and Medline automated searches\n"; print OUT '

GenBank, GeneCards, and Medline links

'; print OUT ''; print OUT ''; foreach $gene (@_) { undef @tmp; my @tmp = split / /, $gene; if ($tmp[1] =~ /\w+/) { $acc = $tmp[1]; } else { $acc = $gene; } $gene = $tmp[0]; print OUT ""; print OUT ''; print OUT ''; print OUT ''; print OUT ""; } print OUT "
Gene SymbolGenBankGeneCardsMedline
$gene'.'GenBank'.'GeneCards'.'PubMed
\n\n"; } # Subroutine to check the number of entries in Medline for a given query term sub medline_count { my $query = $_[0]; $query =~ s/\s+/\+/g; my $URL ='http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&tool=arct&db=pubmed&dispmax=1&doptcmdl=uilist&term='.$query; my $retrieve = get($URL); my $tree = HTML::TreeBuilder->new_from_content($retrieve); my $formatter = HTML::FormatText->new(leftmargin => 0, rightmargin => 50); my $text = $formatter->format($tree); my @lines = split/\n/, $text; my $count = 0; foreach (@lines) { if (/Item 1 of (\d+).*/) { $count = $1; } } return $count; } # For a given $query, google_count returns the number of hits in Google plus the correction suggested by Google, if any. # Example: # print google_count("aging"),"\n",google_count("aghing"); sub google_count { my $query = $_[0]; $query =~ s/\s+/\+/g; my $URL ='http://www.google.com/custom?q='.$query; my $ua = new LWP::UserAgent; $ua->agent('Mozilla/4.0'); my $req = new HTTP::Request GET => "$URL"; my $res = $ua->request($req); my $retrieve = $res->content; my $tree = HTML::TreeBuilder->new_from_content($retrieve); my $formatter = HTML::FormatText->new(leftmargin => 0, rightmargin => 50); my $text = $formatter->format($tree); my @lines = split/\n/, $text; my $count = 0; my $correction = ""; foreach (@lines) { if (/Results 1 - 10 of about (\d+,*\d*)/) { $count = $1; } elsif (/Results 1 - (\d+).*/) { $count = $1; } elsif (/Did you mean: (.*)/) { $correction = $1; } } $count =~ s/,//g; return $count, $correction; } # Subroutine to check if a given SWISS-PROT variant is a polymorphism or a disease. Make sure you use only numbers and # not the "VAR_" characters in $query. # Example: # print var_type("018942"); # sub var_type { my ($var) = @_; my $type = "Unknown"; # Default # Make sure $var has leading zeros while ( length($var) < 6 ) { $var = "0".$var; } my $URL ='http://au.expasy.org/cgi-bin/get-sprot-variant.pl?VAR_'.$var; my $retrieve = get($URL); if ($retrieve =~ /polymorphism or unclassified\)<\/td>\s+(\w+)<\/td>/) { $type = $1; } return $type; } 1;