-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathgfffilter.pl
executable file
·76 lines (60 loc) · 2.17 KB
/
gfffilter.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/perl
@fields = qw(seqname source feature start end score strand frame group);
$usage .= "$0 - filter GFF files\n";
$usage .= "\n";
$usage .= "Usage: $0 [-not] [-near <distance>] <filter expression> [<GFF files>]\n";
$usage .= "\n";
$usage .= "Only allows through GFF's that match the filter expression (or not, if -not switch selected).\n";
$usage .= "Use (".join(" ",map("\$gff$_",@fields)).") to refer to GFF fields, e.g. '\$gfffeature eq \"gene\"'.\n";
$usage .= "Alternatively you can use (".join(" ",map("\$$_",@fields))."), e.g. '\$feature eq \"gene\"'.\n";
$usage .= "Use -near to let through (or not let through) all GFFs that are closer than a certain distance to matching GFFs.\n";
$usage .= "\n";
while (@ARGV) {
last unless $ARGV[0] =~ /^-/;
$opt = lc shift;
if ($opt eq "-not") { $not = 1 }
elsif ($opt eq "-near") { defined($near = shift) or die $usage }
else { die "$usage\nUnknown option: $opt\n" }
}
@ARGV>=1 or die $usage;
$filter = shift;
for ($i=0;$i<@fields;$i++) {
$filter =~ s/\$$fields[$i]/\$gff->[$i]/g;
$filter =~ s/\$gff$fields[$i]/\$gff->[$i]/g;
}
while (<>) {
s/#.*//;
next unless /\S/;
chomp;
$gff = [split(/\t/,$_,9)];
($seqname,$start,$end) = @{$gff}[0,3,4];
if (!defined $near) {
if ($not) { print "$_\n" unless eval $filter }
else { print "$_\n" if eval $filter }
} else {
if ($seqname ne $lastseqname || (defined($lasthit) && $start > $lasthit + $near)) {
printbuffer();
@buffer = ();
undef $firsthit;
undef $lasthit;
$lastseqname = $seqname;
}
if (eval $filter) {
$firsthit = $start unless defined $firsthit;
$lasthit = $end unless $lasthit > $end;
}
for ($i=0;$i<@buffer;$i++) { last if ($buffer[$i]->[4] > $end) }
splice @buffer, $i, 0, $gff;
$cursor = defined($firsthit) ? $firsthit : $start;
while (@buffer && $buffer[0]->[4] < $cursor - $near) {
$deadgff = shift @buffer;
if ($not) { print join("\t",@$deadgff)."\n" }
}
}
}
if (defined $near) { printbuffer() }
sub printbuffer {
return unless ($not && !defined($lasthit)) || (!$not && defined($lasthit));
my $gff;
foreach $gff (@buffer) { print join("\t",@$gff)."\n" }
}