Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
#!/usr/local/bin/perl
use strict;
use warnings;
my $seqfile = $ARGV[0];
my $outdir = $ARGV[1];
my %refseq;
open(my $IFIL, '<', $seqfile) || die "can’t open file $seqfile\n";
my %sequence_data = ();
while (read_fasta_sequence($IFIL, \%sequence_data)) {
my $header = $sequence_data{header};
my $seq = $sequence_data{seq};
#print "$header\n";
$header =~ /^[^-]+-(\d+)-[^-]+-([^-]+)$/;
my $tax = $1;
$refseq{$tax}{$header} = $seq
}
close $IFIL;
foreach my $lineage (keys %refseq){
my $outfile = "${outdir}/${lineage}_pep.fa";
open (OFIL, '>>', $outfile) or die "Couldn't write to file $outfile: $!\n";
foreach my $header (keys %{$refseq{$lineage}}){
my $seq = $refseq{$lineage}{$header};
print OFIL ">${header}\n$seq\n";
}
close OFIL;
}
sub read_fasta_sequence {
my ($IFIL, $seq_info) = @_;
$seq_info->{seq} = undef; # clear out previous sequence
# put the header into place
$seq_info->{header} = $seq_info->{next_header} if $seq_info->{next_header};
my $file_not_empty = 0;
while (<$IFIL>) {
$file_not_empty = 1;
next if /^\s*$/; # skip blank lines
chomp;
if (/^>/) { # fasta header line
my $h = $_;
$h =~ s/^>//;
if ($seq_info->{header}) {
$seq_info->{next_header} = $h;
return $seq_info;
}
else { # first time through only
$seq_info->{header} = $h;
}
}
else {
s/\s+//; # remove any white space
$seq_info->{seq} .= $_;
}
}
if ($file_not_empty) {
return $seq_info;
}
else {
# clean everything up
$seq_info->{header} = $seq_info->{seq} = $seq_info->{next_header} = undef;
return;
}
}