From faa8581da704b05e09f07b2b2458201f3c6b9c56 Mon Sep 17 00:00:00 2001 From: anna-parker Date: Wed, 10 Aug 2022 18:02:53 +0200 Subject: [PATCH 01/12] allow for multiple trees in arg --- treetime/arg.py | 160 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 119 insertions(+), 41 deletions(-) diff --git a/treetime/arg.py b/treetime/arg.py index 3d82008b..a33651dc 100644 --- a/treetime/arg.py +++ b/treetime/arg.py @@ -1,15 +1,52 @@ from matplotlib.pyplot import fill import numpy as np - -def parse_arg(tree1, tree2, aln1, aln2, MCC_file, fill_overhangs=True): +import json +import itertools + +def get_tree_names(tree_nwk_files): + tree_names = [] + for file in tree_nwk_files: + file_name = file.split("/")[-1].split(".")[0] + file_name = file_name.replace("_resolved", "").replace("resolved", "") + tree_names.append(file_name) + if len(set(tree_names)) != len(tree_nwk_files): + #tree name identifiers are not unique + raise Exception("Error: Tree names must be unique, see TreeKnit output format.") + return tree_names + +def get_MCC_dict(MCC_file): + f = open("test.json") + data = json.load(f) + MCC_dict = {} + for key in data["MCC_dict"]: + MCC_dict[frozenset(data["MCC_dict"][key]["trees"])] = data["MCC_dict"][key]["mccs"] + + return MCC_dict + +def get_mask(length_segments, tree_names): + pos_list = [0] + for l in length_segments: + new = pos_list[-1] + l + pos_list.append(new) + mask = {} + no_trees = len(tree_names) + for r in range(1,no_trees): + combos = itertools.combinations(range(no_trees), r) + for comb in combos: + new_mask = np.zeros(sum(length_segments)) + for c in comb: + new_mask[pos_list[c-1]:pos_list[c]] = 1 + mask[frozenset(tree_names[comb])] = new_mask + return mask + +def parse_arg(tree_files, aln_files, MCC_file, fill_overhangs=True): +#def parse_arg(tree_file_dir, aln_file_dir, MCC_file, tree_names, fill_overhangs=True): """parse the output of TreeKnit and return a file structure to be further consumed by TreeTime Args: - tree1 (str): file name of tree1 - tree2 (str): file name of tree2 - aln1 (str): file name of alignment 1 - aln2 (str): file name of alignment 2 + tree_files (str): file names of trees + aln_files (str): file names of alignments MUST be in the same order as tree_files MCC_file (str): name of mcc file fill_overhangs (bool, optional): fill terminal gaps of alignmens before concatenating. Defaults to True. @@ -20,22 +57,21 @@ def parse_arg(tree1, tree2, aln1, aln2, MCC_file, fill_overhangs=True): from Bio.Align import MultipleSeqAlignment from treetime.seq_utils import seq2array - # read trees and determine common terminal nodes - t1 = Phylo.read(tree1, 'newick') - t2 = Phylo.read(tree2, 'newick') - all_leaves = set.intersection(set([x.name for x in t1.get_terminals()]), set([x.name for x in t2.get_terminals()])) - # read MCCs as lists of taxon names - MCCs = [] - with open(MCC_file) as fh: - for line in fh: - if line.strip(): - MCCs.append(line.strip().split(',')) + MCC_dict = get_MCC_dict(MCC_file) + + # read trees, assert trees are named consistently + tree_names = get_tree_names(tree_files) + trees_in_dict = set().union(*MCC_dict.keys()) + assert(all([k in trees_in_dict for k in tree_names])) + trees = [Phylo.read(t, 'newick') for t in tree_files] + + # determine common terminal nodes + all_leaves = set.intersection(*[set([x.name for x in t.get_terminals()]) for t in trees]) # read alignments and construct edge modified sequence arrays - a1 = {s.id:s for s in AlignIO.read(aln1, 'fasta')} - a2 = {s.id:s for s in AlignIO.read(aln2, 'fasta')} - for aln in [a1,a2]: + alignments = [{s.id:s for s in AlignIO.read(aln, 'fasta')} for aln in aln_files] + for aln in alignments: for s,seq in aln.items(): seqstr = "".join(seq2array(seq, fill_overhangs=fill_overhangs)) seq.seq = Seq.Seq(seqstr) @@ -43,23 +79,21 @@ def parse_arg(tree1, tree2, aln1, aln2, MCC_file, fill_overhangs=True): # construct concatenated alignment aln_combined = [] for leaf in all_leaves: - seq = a1[leaf] + a2[leaf] + concat_seq = [] + for a in alignments: + concat_seq += a[leaf] + seq = [concat_seq] seq.id = leaf aln_combined.append(seq) # construct masks for the concatenation and the two segments - l1 = len(a1[leaf]) - l2 = len(a2[leaf]) - combined_mask = np.ones(l1 + l2) - mask1 = np.zeros(l1 + l2) - mask2 = np.zeros(l1 + l2) - mask1[:l1] = 1 - mask2[l1:] = 1 - - return {"MCCs": MCCs, "trees":[t1,t2], "alignment":MultipleSeqAlignment(aln_combined), - "masks":[mask1,mask2], "combined_mask":combined_mask} - -def setup_arg(T, aln, total_mask, segment_mask, dates, MCCs, gtr='JC69', + l = [len(a[leaf]) for a in alignments] + masks = get_mask(l, tree_names) + + return {"MCCs": MCC_dict, "trees":trees, "alignment":MultipleSeqAlignment(aln_combined), + "masks":masks} + +def setup_arg(T, aln, dates, MCCs, masks, gtr='JC69', verbose=0, fill_overhangs=True, reroot=True, fixed_clock_rate=None, alphabet='nuc', **kwargs): """construct a TreeTime object with the appropriate masks on each node for branch length optimization with full or segment only alignment. @@ -91,19 +125,16 @@ def setup_arg(T, aln, total_mask, segment_mask, dates, MCCs, gtr='JC69', tt.reroot("least-squares", force_positive=True, clock_rate=fixed_clock_rate) # make a lookup for the MCCs and assign to tree - leaf_to_MCC = {} - for mi,mcc in enumerate(MCCs): - for leaf in mcc: - leaf_to_MCC[leaf] = mi + leaf_to_MCC = get_mcc_map(MCCs) - assign_mccs(tt.tree, leaf_to_MCC, tt.one_mutation) + assign_all_mccs(tt.tree, len(MCCs), leaf_to_MCC, tt.one_mutation) # assign masks to branches whenever child and parent are in the same MCC for n in tt.tree.find_clades(): - if (n.mcc is not None) and n.up and n.up.mcc==n.mcc: - n.mask = total_mask - else: - n.mask = segment_mask + shared = [(n.mcc[pos] is not None) and n.up and n.up.mcc[pos]==n.mcc[pos] for pos in len(MCCs)] + pos_shared = frozenset([i for i, x in enumerate(shared) if x]) + n.mask = masks[pos_shared] + return tt @@ -147,3 +178,50 @@ def assign_mccs(tree, mcc_map, one_mutation=1e-4): n.mcc = list(n.child_mccs)[0] else: # no unique child MCC and no match with parent -> not part of an MCCs n.mcc = None + +def get_mcc_map(MCCs_list): + # make a lookup for the MCCs and assign to trees + leaf_to_MCC = {} + for MCCs in MCCs_list: + for mi,mcc in enumerate(MCCs): + for leaf in mcc: + if leaf not in leaf_to_MCC: + leaf_to_MCC[leaf] = [mi] + else: + leaf_to_MCC[leaf].append(mi) + return leaf_to_MCC + +def assign_all_mccs(tree, len_tree_list, mcc_map, one_mutation=1e-4): + for leaf in tree.get_terminals(): + leaf.child_mccs = [set([mcc_map[leaf.name][pos]]) for pos in range(len_tree_list)] + leaf.mcc = mcc_map[leaf.name] + leaf.branch_length = max(0.5*one_mutation, leaf.branch_length) + # reconstruct MCCs with Fitch algorithm + for n in tree.get_nonterminals(order='postorder'): + common_mccs = [set.intersection(*[c.child_mccs[pos] for c in n]) for pos in range(len_tree_list)] + n.branch_length = max(0.5*one_mutation, n.branch_length) + n.child_mccs = [] + for pos in range(len_tree_list): + if len(common_mccs[pos]): + n.child_mccs.append(common_mccs[pos]) + else: + n.child_mccs.append(set.union(*[c.child_mccs[pos] for c in n])) + mcc_intersection = [set.intersection(*[c.child_mccs[pos] for c in tree.root]) for pos in range(len_tree_list)] + tree.root.mcc = [] + for pos in range(len_tree_list): + if len(mcc_intersection[pos]): + tree.root.mcc.append(list(mcc_intersection[pos])[0]) + else: + tree.root.mcc.append(None) + for n in tree.get_nonterminals(order='preorder'): + if n==tree.root: + continue + else: + n.mcc = [] + for pos in range(len_tree_list): + if n.up.mcc[pos] in n.child_mccs[pos]: # parent MCC part of children -> that is the MCC + n.mcc.append(n.up.mcc[pos]) + elif len(n.child_mccs[pos])==1: # child is an MCC + n.mcc.append(list(n.child_mccs[pos])[0]) + else: # no unique child MCC and no match with parent -> not part of an MCCs + n.mcc.append(None) \ No newline at end of file From dbb1c7e94172229d3687a2140531b96737bfab2c Mon Sep 17 00:00:00 2001 From: anna-parker Date: Thu, 11 Aug 2022 17:37:36 +0200 Subject: [PATCH 02/12] add arg tests --- test/arg/TreeKnit/MCCs.json | 17 +++ test/arg/TreeKnit/tree_a_resolved.nwk | 1 + test/arg/TreeKnit/tree_b_resolved.nwk | 1 + test/arg/TreeKnit/tree_c_resolved.nwk | 1 + test/arg/aln_a.fasta | 160 ++++++++++++++++++++++++++ test/arg/aln_b.fasta | 160 ++++++++++++++++++++++++++ test/arg/aln_c.fasta | 160 ++++++++++++++++++++++++++ test/arg/metadata.csv | 11 ++ test/test_arg.py | 55 +++++++++ treetime/arg.py | 16 +-- 10 files changed, 574 insertions(+), 8 deletions(-) create mode 100644 test/arg/TreeKnit/MCCs.json create mode 100644 test/arg/TreeKnit/tree_a_resolved.nwk create mode 100644 test/arg/TreeKnit/tree_b_resolved.nwk create mode 100644 test/arg/TreeKnit/tree_c_resolved.nwk create mode 100644 test/arg/aln_a.fasta create mode 100644 test/arg/aln_b.fasta create mode 100644 test/arg/aln_c.fasta create mode 100644 test/arg/metadata.csv create mode 100644 test/test_arg.py diff --git a/test/arg/TreeKnit/MCCs.json b/test/arg/TreeKnit/MCCs.json new file mode 100644 index 00000000..3113c3ce --- /dev/null +++ b/test/arg/TreeKnit/MCCs.json @@ -0,0 +1,17 @@ +{ + "MCC_dict" : { + "1": { + "trees":["tree_a", "tree_b"], + "mccs": [["3"], ["6"], ["8"], ["4", "7"], ["1", "2", "5", "9", "10"]] + + }, + "2": { + "trees":["tree_a", "tree_c"], + "mccs": [["3"], ["6"], ["8"], ["1", "2", "4", "5", "7", "9", "10"]] + }, + "3": { + "trees":["tree_b", "tree_c"], + "mccs": [["3"], ["4", "7"], ["1", "2", "5", "6", "8", "9", "10"]] + } + } +} \ No newline at end of file diff --git a/test/arg/TreeKnit/tree_a_resolved.nwk b/test/arg/TreeKnit/tree_a_resolved.nwk new file mode 100644 index 00000000..eff6a0ee --- /dev/null +++ b/test/arg/TreeKnit/tree_a_resolved.nwk @@ -0,0 +1 @@ +((3,1,10,(4,7)),(2,6),(5,8,9)); \ No newline at end of file diff --git a/test/arg/TreeKnit/tree_b_resolved.nwk b/test/arg/TreeKnit/tree_b_resolved.nwk new file mode 100644 index 00000000..eeb63d4f --- /dev/null +++ b/test/arg/TreeKnit/tree_b_resolved.nwk @@ -0,0 +1 @@ +(((1,10),3),(((4,7),(2,6)),((5,9),8))); \ No newline at end of file diff --git a/test/arg/TreeKnit/tree_c_resolved.nwk b/test/arg/TreeKnit/tree_c_resolved.nwk new file mode 100644 index 00000000..3c840a17 --- /dev/null +++ b/test/arg/TreeKnit/tree_c_resolved.nwk @@ -0,0 +1 @@ +((((1,10),(4,7)),3),(2,6),((5,9),8)); \ No newline at end of file diff --git a/test/arg/aln_a.fasta b/test/arg/aln_a.fasta new file mode 100644 index 00000000..308dbefa --- /dev/null +++ b/test/arg/aln_a.fasta @@ -0,0 +1,160 @@ +>1 +ATGCATGCACCAGAAGGGCGGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGGCAGACAAAGAC +GAATTGGTGCAATATAGCTATATATCCTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGGGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCTACCTCCATAGTCCGTGCTACACGCCGG +GGTCAGCGCGCATCGCGTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGCCGTAGCAGATAAATGTGCTATATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGCATCTCTGCGTTATCATACCTACAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGGCCAAGATGAAATTCTAAAGTTAGCC +GAGGTGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>2 +ATGCATGCACCAGAAGGGCGGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGGCAGACAAAGAC +GAATTGGTGCAATATAGCTATATATCCTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGGGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCTACCTCCATAGTCCGTGCTACACGCCGG +GGTCAGCGCGCATCGCGTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGCCGTAGCAGATAAATGTGCTATATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGCATCTCTGCGTTATCATACCTACAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGGCCAAGATGAAATTCTAAAGTTAGCC +GAGGTGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>10 +ATGCATGCACCAGAAGGGCGGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGGCAGACAAAGAC +GAATTGGTGCAATATAGCTATATATCCTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGGGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCTACCTCCATAGTCCGTGCTACACGCCGG +GGTCAGCGCGCATCGCGTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGCCGTAGCAGATAAATGTGCTATATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGCATCTCTGCGTTATCATACCTACAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGGCCAAGATGAAATTCTAAAGTTAGCC +GAGGTGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>4 +ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAATAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGCCAAAGAC +GAATTGGTGCAATATAGCTATAGAGACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG +GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGACGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTATAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGGC +GAGGAGTTTCTCGAACCAACGTAGAGTCAGGGCAGCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTCTCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>5 +ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAATAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGCCAAAGAC +GAATTGGTGCAATATAGCTATAGAGACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG +GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGACGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTATAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGGC +GAGGAGTTTCTCGAACCAACGTAGAGTCAGGGCAGCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTCTCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>6 +ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAATAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGCCAAAGAC +GAATTGGTGCAATATAGCTATAGAGACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG +GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGACGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTATAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGGC +GAGGAGTTTCTCGAACCAACGTAGAGTCAGGGCAGCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTCTCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAGACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>7 +ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC +GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT +GAAGGACAGCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG +GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACGGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGCC +GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCTCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTGCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGCCG +>8 +ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC +GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTACTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATGATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATCCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG +GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGCCAG +TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTCGCC +GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCGCGTTTGGTCCTTGAAATCCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>9 +ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC +GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTACTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATGATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATCCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG +GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAACCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTCGCC +GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCGCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCTCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCACGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>3 +ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC +GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTAGTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATGATAGATGAATATCAAGCTCATCTTAAGGACGGCA +ATCCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG +GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTCGCC +GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCGCGTTTGGTCCTTGAAATCCGCTC +GACCCACCGGGCGCCGAATTTAACTCTCTAATGTTACTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG \ No newline at end of file diff --git a/test/arg/aln_b.fasta b/test/arg/aln_b.fasta new file mode 100644 index 00000000..308dbefa --- /dev/null +++ b/test/arg/aln_b.fasta @@ -0,0 +1,160 @@ +>1 +ATGCATGCACCAGAAGGGCGGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGGCAGACAAAGAC +GAATTGGTGCAATATAGCTATATATCCTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGGGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCTACCTCCATAGTCCGTGCTACACGCCGG +GGTCAGCGCGCATCGCGTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGCCGTAGCAGATAAATGTGCTATATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGCATCTCTGCGTTATCATACCTACAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGGCCAAGATGAAATTCTAAAGTTAGCC +GAGGTGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>2 +ATGCATGCACCAGAAGGGCGGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGGCAGACAAAGAC +GAATTGGTGCAATATAGCTATATATCCTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGGGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCTACCTCCATAGTCCGTGCTACACGCCGG +GGTCAGCGCGCATCGCGTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGCCGTAGCAGATAAATGTGCTATATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGCATCTCTGCGTTATCATACCTACAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGGCCAAGATGAAATTCTAAAGTTAGCC +GAGGTGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>10 +ATGCATGCACCAGAAGGGCGGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGGCAGACAAAGAC +GAATTGGTGCAATATAGCTATATATCCTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGGGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCTACCTCCATAGTCCGTGCTACACGCCGG +GGTCAGCGCGCATCGCGTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGCCGTAGCAGATAAATGTGCTATATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGCATCTCTGCGTTATCATACCTACAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGGCCAAGATGAAATTCTAAAGTTAGCC +GAGGTGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>4 +ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAATAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGCCAAAGAC +GAATTGGTGCAATATAGCTATAGAGACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG +GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGACGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTATAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGGC +GAGGAGTTTCTCGAACCAACGTAGAGTCAGGGCAGCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTCTCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>5 +ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAATAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGCCAAAGAC +GAATTGGTGCAATATAGCTATAGAGACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG +GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGACGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTATAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGGC +GAGGAGTTTCTCGAACCAACGTAGAGTCAGGGCAGCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTCTCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>6 +ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAATAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGCCAAAGAC +GAATTGGTGCAATATAGCTATAGAGACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG +GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGACGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTATAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGGC +GAGGAGTTTCTCGAACCAACGTAGAGTCAGGGCAGCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTCTCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAGACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>7 +ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC +GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT +GAAGGACAGCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG +GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACGGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGCC +GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCTCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTGCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGCCG +>8 +ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC +GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTACTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATGATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATCCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG +GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGCCAG +TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTCGCC +GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCGCGTTTGGTCCTTGAAATCCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>9 +ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC +GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTACTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATGATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATCCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG +GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAACCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTCGCC +GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCGCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCTCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCACGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>3 +ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC +GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTAGTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATGATAGATGAATATCAAGCTCATCTTAAGGACGGCA +ATCCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG +GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTCGCC +GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCGCGTTTGGTCCTTGAAATCCGCTC +GACCCACCGGGCGCCGAATTTAACTCTCTAATGTTACTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG \ No newline at end of file diff --git a/test/arg/aln_c.fasta b/test/arg/aln_c.fasta new file mode 100644 index 00000000..308dbefa --- /dev/null +++ b/test/arg/aln_c.fasta @@ -0,0 +1,160 @@ +>1 +ATGCATGCACCAGAAGGGCGGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGGCAGACAAAGAC +GAATTGGTGCAATATAGCTATATATCCTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGGGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCTACCTCCATAGTCCGTGCTACACGCCGG +GGTCAGCGCGCATCGCGTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGCCGTAGCAGATAAATGTGCTATATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGCATCTCTGCGTTATCATACCTACAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGGCCAAGATGAAATTCTAAAGTTAGCC +GAGGTGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>2 +ATGCATGCACCAGAAGGGCGGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGGCAGACAAAGAC +GAATTGGTGCAATATAGCTATATATCCTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGGGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCTACCTCCATAGTCCGTGCTACACGCCGG +GGTCAGCGCGCATCGCGTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGCCGTAGCAGATAAATGTGCTATATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGCATCTCTGCGTTATCATACCTACAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGGCCAAGATGAAATTCTAAAGTTAGCC +GAGGTGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>10 +ATGCATGCACCAGAAGGGCGGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGGCAGACAAAGAC +GAATTGGTGCAATATAGCTATATATCCTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGGGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCTACCTCCATAGTCCGTGCTACACGCCGG +GGTCAGCGCGCATCGCGTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGCCGTAGCAGATAAATGTGCTATATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGCATCTCTGCGTTATCATACCTACAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGGCCAAGATGAAATTCTAAAGTTAGCC +GAGGTGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>4 +ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAATAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGCCAAAGAC +GAATTGGTGCAATATAGCTATAGAGACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG +GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGACGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTATAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGGC +GAGGAGTTTCTCGAACCAACGTAGAGTCAGGGCAGCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTCTCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>5 +ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAATAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGCCAAAGAC +GAATTGGTGCAATATAGCTATAGAGACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG +GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGACGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTATAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGGC +GAGGAGTTTCTCGAACCAACGTAGAGTCAGGGCAGCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTCTCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>6 +ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAATAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGCCAAAGAC +GAATTGGTGCAATATAGCTATAGAGACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG +GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGACGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTATAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGGC +GAGGAGTTTCTCGAACCAACGTAGAGTCAGGGCAGCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTCTCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAGACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>7 +ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC +GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT +GAAGGACAGCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG +GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACGGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGCC +GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCTCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTGCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGCCG +>8 +ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC +GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTACTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATGATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATCCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG +GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGCCAG +TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTCGCC +GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCGCGTTTGGTCCTTGAAATCCGCTC +GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>9 +ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC +GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTACTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATGATAGATGAATATCAAGCTCATCTTCAGGACGGCA +ATCCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG +GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAACCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTCGCC +GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCGCGTTTGGTCCTTGAAATGCGCTC +GACCCACCTGGCTCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCACGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG +>3 +ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG +CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC +GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTAGTTTGCATGATTTT +GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATGATAGATGAATATCAAGCTCATCTTAAGGACGGCA +ATCCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG +GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG +TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG +GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA +GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG +GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTCGCC +GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCGCGTTTGGTCCTTGAAATCCGCTC +GACCCACCGGGCGCCGAATTTAACTCTCTAATGTTACTCACGTTATAACACGACGCCCGGTATACCTAAC +GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT +GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA +CTCCTATATTTTCTTTGTCG \ No newline at end of file diff --git a/test/arg/metadata.csv b/test/arg/metadata.csv new file mode 100644 index 00000000..eb2d74b9 --- /dev/null +++ b/test/arg/metadata.csv @@ -0,0 +1,11 @@ +name, date +1,2015.30 +2,2015.30 +3,2015.30 +4,2015.30 +5,2015.30 +6,2015.30 +7,2015.30 +8,2015.30 +9,2015.30 +10,2015.30 \ No newline at end of file diff --git a/test/test_arg.py b/test/test_arg.py new file mode 100644 index 00000000..825fd125 --- /dev/null +++ b/test/test_arg.py @@ -0,0 +1,55 @@ +# Test the arg functions on multiple trees +def test_reading_in_TK_output(): + import treetime.arg + tree_nwk_files = ['test/arg/TreeKnit/tree_a_resolved.nwk','test/arg/TreeKnit/tree_b_resolved.nwk', 'test/arg/TreeKnit/tree_c_resolved.nwk'] + tree_names = ["tree_a", "tree_b", "tree_c"] + assert treetime.arg.get_tree_names(tree_nwk_files) == tree_names + MCC_dict = treetime.arg.get_MCC_dict('test/arg/TreeKnit/MCCs.json') + + # read trees, assert trees are named consistently + tree_names = ["tree_a", "tree_b", "tree_c"] + trees_in_dict = set().union(*MCC_dict.keys()) + assert all([k in trees_in_dict for k in tree_names])== True + +test_reading_in_TK_output() +def test_assign_mccs(): + from Bio import Phylo + from treetime import TreeTime + from treetime import arg + from treetime.utils import parse_dates + + tree = Phylo.read('test/arg/TreeKnit/tree_a_resolved.nwk', 'newick') + tt = TreeTime(dates=parse_dates("test/arg/metadata.csv"), tree=tree, + aln="test/arg/aln_a.fasta", gtr='JC69', alphabet='nuc', verbose=True, + fill_overhangs=True, keep_node_order=True, + compress=False) + + # make a lookup for the MCCs and assign to tree + MCC_dict = arg.get_MCC_dict('test/arg/TreeKnit/MCCs.json') + MCC_locs = [frozenset(["tree_a", "tree_b"]), frozenset(["tree_a", "tree_c"])] + MCCs = [MCC_dict[loc] for loc in MCC_locs] + leaf_to_MCC = arg.get_mcc_map(MCCs) + assert leaf_to_MCC == {'3': [0, 0], '6': [1, 1], '8': [2, 2], '4': [3, 3], '7': [3, 3], '1': [4, 3], '2': [4, 3], '5': [4, 3], '9': [4, 3], '10': [4, 3]} + arg.assign_all_mccs(tt.tree, len(MCCs), leaf_to_MCC, tt.one_mutation) + assert all([tt.tree.root.mcc == c.mcc for c in tt.tree.root.clades]) + assert sorted([c.mcc for c in tt.tree.root.clades[0]]) == sorted([[0, 0], [3,3], [4, 3], [4,3]]) + assert sorted([c.mcc for c in tt.tree.root.clades[1]]) == sorted([[1, 1], [4, 3]]) + assert sorted([c.mcc for c in tt.tree.root.clades[2]]) == sorted([[2, 2], [4, 3], [4,3]]) + +test_assign_mccs() + +def test_parse_args(): + from treetime import arg + import numpy as np + + tree_nwk_files = ['test/arg/TreeKnit/tree_a_resolved.nwk','test/arg/TreeKnit/tree_b_resolved.nwk', 'test/arg/TreeKnit/tree_c_resolved.nwk'] + aln_files = ['test/arg/aln_a.fasta','test/arg/aln_b.fasta', 'test/arg/aln_c.fasta'] + MCC_file = 'test/arg/TreeKnit/MCCs.json' + + dict_ = arg.parse_arg(tree_nwk_files, aln_files, MCC_file, fill_overhangs=True) + assert sum(dict_["masks"][frozenset(["tree_a"])]) == sum(dict_["masks"][frozenset(["tree_b"])]) == sum(dict_["masks"][frozenset(["tree_c"])]) ==1000 + assert all(dict_["masks"][frozenset(["tree_a"])] == np.concatenate((np.ones(1000), np.zeros(2000)))) + assert all(dict_["masks"][frozenset(["tree_a", "tree_b"])] == np.concatenate((np.ones(2000), np.zeros(1000)))) + assert all(dict_["masks"][frozenset(["tree_a", "tree_b", "tree_c"])] == np.ones(3000)) + +test_parse_args() \ No newline at end of file diff --git a/treetime/arg.py b/treetime/arg.py index a33651dc..beebce10 100644 --- a/treetime/arg.py +++ b/treetime/arg.py @@ -15,7 +15,7 @@ def get_tree_names(tree_nwk_files): return tree_names def get_MCC_dict(MCC_file): - f = open("test.json") + f = open(MCC_file) data = json.load(f) MCC_dict = {} for key in data["MCC_dict"]: @@ -30,13 +30,13 @@ def get_mask(length_segments, tree_names): pos_list.append(new) mask = {} no_trees = len(tree_names) - for r in range(1,no_trees): - combos = itertools.combinations(range(no_trees), r) + for r in range(1,(no_trees+1)): + combos = itertools.combinations(range(1, (no_trees+1)), r) for comb in combos: new_mask = np.zeros(sum(length_segments)) for c in comb: new_mask[pos_list[c-1]:pos_list[c]] = 1 - mask[frozenset(tree_names[comb])] = new_mask + mask[frozenset([tree_names[c-1] for c in comb])] = new_mask return mask def parse_arg(tree_files, aln_files, MCC_file, fill_overhangs=True): @@ -79,10 +79,10 @@ def parse_arg(tree_files, aln_files, MCC_file, fill_overhangs=True): # construct concatenated alignment aln_combined = [] for leaf in all_leaves: - concat_seq = [] - for a in alignments: - concat_seq += a[leaf] - seq = [concat_seq] + concat_seq = alignments[1][leaf] + for a in range(1, len(alignments)): + concat_seq += alignments[a][leaf] + seq = concat_seq seq.id = leaf aln_combined.append(seq) From 3da70ea0742013537550e6f6c74cf6772cb6a7a5 Mon Sep 17 00:00:00 2001 From: anna-parker Date: Fri, 12 Aug 2022 17:52:09 +0200 Subject: [PATCH 03/12] add tests and make sure multiTK runs on CLI --- test/arg/TreeKnit/MCCs.json | 37 +++--- test/arg/TreeKnit/aln_a.fasta | 160 ++++++++++++++++++++++++++ test/arg/TreeKnit/aln_b.fasta | 160 ++++++++++++++++++++++++++ test/arg/TreeKnit/aln_c.fasta | 160 ++++++++++++++++++++++++++ test/arg/TreeKnit/metadata.csv | 11 ++ test/arg/TreeKnit/tree_a_resolved.nwk | 2 +- test/arg/TreeKnit/tree_b_resolved.nwk | 2 +- test/arg/TreeKnit/tree_c_resolved.nwk | 2 +- test/arg/aln_a.fasta | 160 -------------------------- test/arg/aln_b.fasta | 160 -------------------------- test/arg/aln_c.fasta | 160 -------------------------- test/arg/metadata.csv | 11 -- test/command_line_tests.sh | 8 ++ test/test_arg.py | 59 +++++++--- treetime/arg.py | 38 ++++-- treetime/argument_parser.py | 17 ++- treetime/wrappers.py | 14 +-- 17 files changed, 617 insertions(+), 544 deletions(-) create mode 100644 test/arg/TreeKnit/aln_a.fasta create mode 100644 test/arg/TreeKnit/aln_b.fasta create mode 100644 test/arg/TreeKnit/aln_c.fasta create mode 100644 test/arg/TreeKnit/metadata.csv delete mode 100644 test/arg/aln_a.fasta delete mode 100644 test/arg/aln_b.fasta delete mode 100644 test/arg/aln_c.fasta delete mode 100644 test/arg/metadata.csv diff --git a/test/arg/TreeKnit/MCCs.json b/test/arg/TreeKnit/MCCs.json index 3113c3ce..c740446e 100644 --- a/test/arg/TreeKnit/MCCs.json +++ b/test/arg/TreeKnit/MCCs.json @@ -1,17 +1,22 @@ -{ - "MCC_dict" : { - "1": { - "trees":["tree_a", "tree_b"], - "mccs": [["3"], ["6"], ["8"], ["4", "7"], ["1", "2", "5", "9", "10"]] - - }, - "2": { - "trees":["tree_a", "tree_c"], - "mccs": [["3"], ["6"], ["8"], ["1", "2", "4", "5", "7", "9", "10"]] - }, - "3": { - "trees":["tree_b", "tree_c"], - "mccs": [["3"], ["4", "7"], ["1", "2", "5", "6", "8", "9", "10"]] - } - } +{ "MCC_dict" : { +"1": { + "trees":["tree_a", "tree_b"], +"mccs": [["3_0"], +["10_0", "4_0"], +["5_0", "8_0"], +["1_0", "2_0", "6_0", "7_0", "9_0"]] +}, +"2": { + "trees":["tree_a", "tree_c"], +"mccs": [["3_0"], +["10_0", "4_0"], +["5_0", "8_0"], +["1_0", "2_0", "6_0", "7_0", "9_0"]] +}, +"3": { + "trees":["tree_b", "tree_c"], +"mccs": [["1_0", "2_0", "6_0"], +["10_0", "3_0", "4_0", "5_0", "7_0", "8_0", "9_0"]] +} +} } \ No newline at end of file diff --git a/test/arg/TreeKnit/aln_a.fasta b/test/arg/TreeKnit/aln_a.fasta new file mode 100644 index 00000000..32e645a6 --- /dev/null +++ b/test/arg/TreeKnit/aln_a.fasta @@ -0,0 +1,160 @@ +>1_0 +AATTCGACAAGACAATCTCCATACGCTATTGGATGGATTCTCGAGGGTACGTGTATGCGTGACCAACCCA +CCCATAAACGCGTCGAGAATTATGACCGATAGTAGATGGCCCCGTACCGGATCGGTGGGGGCTTAACCGG +AGCTTACTCGCTGACCACTTCTGGCCGTCATCCAAGGTTAAAAACCACGCCACCCAGCACGAAACATATC +GTTAGCGCCCACTGTAGAGGTCGGTGCTCGAGAAGGGTATTTCTTCGCGTTTTCTATTCCCCAAGAATTC +GGCCTTGGAACTGGTTGTCCACTACTCGAGGTTCACCCCCTGACTGGATTTTTTGACCTGTTGTCCCGAG +GCCTCCGCCCATGTACGAGTGGCTTCCTAGATAGTGTCCTAACCCTCTGCACTAGTAGAGTGAGCCGCCC +CATCAATCAACCACAAATATAAAGAAGACTCCTTAATTCTGACACGTTCCGCTCCGCATCATAAGAAAAC +TTTGATCAATCTGTTCTCTTCTCGAGACCGCCAGTAATAAACGGACACCCCTTTATGGCCGTTGTTTGAT +AAAGTGATCTGCACTGCGAAAAATTTCGCCAACCGATCTCCGGTCCGGCCACTACACGTAGAAGCGTTTC +ATCACACGACATGAATGAGTAGCCCTACCGTGGGCATCTTCGTTTGTCCGCTCAGCCTGATCCGCGGGTC +AGGGCGGAACAGTATGGTTGGACTCCTTTCGGAGCGGGCATACCTAGTTTTCCTGGGTTAGAAACTTTCC +CCAATCCGTTGTTAGAGCCATCTTTAGAACAGTCCAGCGGTCGTGGCGCGGAAAAGTGTTGAGGGCTGCC +GCGGTGACGGATCCTCACTCTTCGCAGAGCTCCTGTTAGCCCAGTATGAACCTCATAAACCGTCTAAGAA +TCGGTTCCGCATACGACTGAGGGCTGCACAGTAAGCACATAATATAGCGTTGTCAATACAGATACGAATA +CCTGGTCTACGCCGCAAGAG +>6_0 +AATTCGACAAGACAATCTCCATACGCTATTGGATGGATTCTCGAGGGTACGTGTATGCGTGACCAACCCA +CCCATAAACGCGTCGAGAATTATGACCGATAGTAGATGGCCCCGTACCGGATCGGTGGGGGCTTAACCGG +AGCTTACTCGCTGACCACTTCTGGCCGTCATCCAAGGTTAAAAACCACGCCACCCAGCACGAAACATATC +GTTAGCGCCCACTGTAGAGGTCGGTGCTCGAGAAGGGTATTTCTTCGCGTTTTCTATTCCCCAAGAATTC +GGCCTTGGAACTGGTTGTCCACTACTCGAGGTTCACCCCCTGACTGGATTTTTTGACCTGTTGTCCCGAG +GCCTCCGCCCATGTACGAGTGGCTTCCTAGATAGTGTCCTAACCCTCTGCACTAGTAGAGTGAGCCGCCC +CATCAATCAACCACAAATATAAAGAAGACTCCTTAATTCTGACACGTTCCGCTCCGCATCATAAGAAAAC +TTTGATCAATCTGTTCTCTTCTCGAGACCGCCAGTAATAAACGGACACCCCTTTATGGCCGTTGTTTGAT +AAAGTGATCTGCACTGCGAAAAATTTCGCCAACCGATCTCCGGTCCGGCCACTACACGTAGAAGCGTTTC +ATCACACGACATGAATGAGTAGCCCTACCGTGGGCATCTTCGTTTGTCCGCTCAGCCTGATCCGCGGGTC +AGGGCGGAACAGTATGGTTGGACTCCTTTCGGAGCGGGCATACCTAGTTTTCCTGGGTTAGAAACTTTCC +CCAATCCGTTGTTAGAGCCATCTTTAGAACAGTCCAGCGGTCGTGGCGCGGAAAAGTGTTGAGGGCTGCC +GCGGTGACGGATCCTCACTCTTCGCAGAGCTCCTGTTAGCCCAGTATGAACCTCATAAACCGTCTAAGAA +TCGGTTCCGCATACGACTGAGGGCTGCACAGTAAGCACATAATATAGCGTTGTCAATACAGATACGAATA +CCTGGTCTACGCCGCAAGAG +>2_0 +AATTCGACAAGACAATCTCCATACGCTATTGGATGGATTCTCGAGGGTACGTGTATGCGTGACCAACCCA +CCCATAAACGCGTCGAGAATTATGACCGATAGTAGATGGCCCCGTACCGGATCGGTGGGGGCTTAACCGG +AGCTTACTCGCTGACCACTTCTGGCCGTCATCCAAGGTTAAAAACCACGCCACCCAGCACGAAACATATC +GTTAGCGCCCACTGTAGAGGTCGGTGCTCGAGAAGGGTATTTCTTCGCGTTTTCTATTCCCCAAGAATTC +GGCCTTGGAACTGGTTGTCCACTACTCGAGGTTCACCCCCTGACTGGATTTTTTGACCTGTTGTCCCGAG +GCCTCCGCCCATGTACGAGTGGCTTCCTAGATAGTGTCCTAACCCTCTGCACTAGTAGAGTGAGCCGCCC +CATCAATCAACCACAAATATAAAGAAGACTCCTTAATTCTGACACGTTCCGCTCCGCATCATAAGAAAAC +TTTGATCAATCTGTTCTCTTCTCGAGACCGCCAGTAATAAACGGACACCCCTTTATGGCCGTTGTTTGAT +AAAGTGATCTGCACTGCGAAAAATTTCGCCAACCGATCTCCGGTCCGGCCACTACACGTAGAAGCGTTTC +ATCACACGACATGAATGAGTAGCCCTACCGTGGGCATCTTCGTTTGTCCGCTCAGCCTGATCCGCGGGTC +AGGGCGGAACAGTATGGTTGGACTCCTTTCGGAGCGGGCATACCTAGTTTTCCTGGGTTAGAAACTTTCC +CCAATCCGTTGTTAGAGCCATCTTTAGAACAGTCCAGCGGTCGTGGCGCGGAAAAGTGTTGAGGGCTGCC +GCGGTGACGGATCCTCACTCTTCGCAGAGCTCCTGTTAGCCCAGTATGAACCTCATAAACCGTCTAAGAA +TCGGTTCCGCATACGACTGAGGGCTGCACAGTAAGCACATAATATAGCGTTGTCAATACAGATACGAATA +CCTGGTCTACGCCGCAAGAG +>8_0 +AATTCGACAAGACACTCTCCATACGCTATTGGATGGATTCTCGAGGGTACGTGTATGCGTGTCCAACCCA +CCCATAAACGCGTCGAGAATTATGACCGATAGTAGATGGCCCCGTACCGGATCGGTGGGGGCTTAACCGG +AGCTTACTCGCTGACCACTTCTGGCCGTCATCCAAGGTTAAAAACCACGCCACCCAGCACGAAACATATC +GTTAGCGCCCACTGTAGAGGTCGGTGCTCGAGAAGGGTATTTCTTCGCGTTTTCTATTCCCCAAGAATTC +GGCCTTGGAACTGGTTGTCCACTACTCGAGGTTCACCCCCTGACTGGATTTTTTGACCTGTTGTCCCGAG +GCCTCCGCCCATGTACGAGTGGCTTCCTAGATAGTGTCCTAACCCTCTGCACTAGTAGAGTGAGCCGCCC +CATCAATCAACCACAAATATAAAGAAGACTCCTTAATTCTGACACGTTCCGCTCCGCATCATAAGAAAAC +TTTGATCAATCTGTTCTCTTCTCGAGACCGCCATTAATAAACGGACACCCCTTTATGGCCGTTGTTTGAT +AAAGTGATCTTCACTGCGAAAAATTTCGCCAACCGATCTCCGGTCCGGCCACTACACGTAGAAGCGTTTC +ATCACACGACATGAATGAGTAGCCCTACCGTGGGCATCTTCGTTTGTCCGCTCAGCCTGATCCGCGGGTC +AGGGCGGAACAGTATGGTTGGACTCCTTTCGGTGCGGGCATACCTAGTTTTCCTGGGTTAGAAACTTTCC +CCAATCCGTTGTTAGAGCCATCTTTAGAACAGTCCAGCGGTCGTGGCGCGGAAAAGTGTTGAGGGCTGCC +GCGGTGACGGATCCTCACTCTTCGCAGAGCTCCGGTTAGCCCAGTATGAACCTCATAAACCGTCTAAGAA +TCGGTTCCGCATACGACTGAGGGCTGCACAGTAAGCACATAATATAGCGTTGTCAATACAGATACGAATA +CCTGGTCTACGCCGCAAGAG +>5_0 +AATTCGACAAGACACTCTCCATACGCTATTGGATGGATTCTCGAGGGTACGTGTATGCGTGTCCAACCCA +CCCATAAACGCGTCGAGAATTATGACCGATAGTAGATGGCCCCGTACCGGATCGGTGGGGGCTTAACCGG +AGCTTACTCGCTGACCACTTCTGGCCGTCATCCAAGGTTAAAAACCACGCCACCCAGCACGAAACATATC +GTTAGCGCCCACTGTAGAGGTCGGTGCTCGAGAAGGGTATTTCTTCGCGTTTTCTATTCCCCAAGAATTC +GGCCTTGGAACTGGTTGTCCACTACTCGAGGTTCACCCCCTGACTGGATTTTTTGACCTGTTGTCCCGAG +GCCTCCGCCCATGTACGAGTGGCTTCCTAGATAGTGTCCTAACCCTCTGCACTAGTAGAGTGAGCCGCCC +CATCAATCAACCACAAATATAAAGAAGACTCCTTAATTCTGACACGTTCCGCTCCGCATCATAAGAAAAC +TTTGATCAATCTGTTCTCTTCTCGAGACCGCCAGTAATAAACGGACACCCCTTTATGGCCGTTGTTTGAT +AAAGTGATCTTCACTGCGAAAAATTTCGCCAACCGATCTCCGGTCCGGCCACTACACGTAGAAGCGTTTC +ATCACACGACATGAATGAGTAGCCCTACCGTGGGCATCTTCGTTTGTCCGCTCAGCCTGATCCGCGGGTC +AGGGCGGAACAGTATGGTTGGACTCCTTTCGGTGCGGGCATACCTAGTTTTCCTGGGTTAGAAACTTTCC +CCAATCCGTTGTTAGAGCCATCTTTAGAACAGTCCAGCGGTCGTGGCGCGGAAAAGTGTTGAGGGCTGCC +GCGGTGACGGATCCTCACTCTTCGCAGAGCTCCGGTTAGCCCAGTATGAACCTCATAAACCGTCTAAGAA +TCGGTTCCGCATACGACTGAGGGCTGCACAGTAAGCACATAATATAGCGTTGTCAATACAGATACGAATA +CCTGGTCTACGCCGCAAGAG +>7_0 +AATTCGACAAGACACTCTCCATACGCTATTGGATGGATTCTCGAGGGTACGTGTATGCGTGTCCAACCCA +CCCATAAACGCGTCGAGAATTATGACCGATAGTAGATGGCCCCGTACCGGATCGGTGGGGGCTTAACCGG +AGCTTACTCGCTGACCACTTCTGGCCGTCATCCAAGGTTAAAAACCACGCCACCCAGCACGAAACATATC +GTTAGCGCCCACTGTAGAGGTAGGTGCTCGAGAAGGGTATTTCTTCGCGTTTTCTATTCCCCAAGAATTC +GGCCTTGGAACTGGTTGTCCACTACTCGAGGTTCACCCCCTGACTGGATTTTTTGACCTGTTGTCCCGAG +GCCTCCGCCCATGTACGAGTGGCTTCCTAGATAGTGTCCTAACCCTCTGCACTAGTAGAGTGAGCCGCCC +CATCAATCAACCACAAATATAAAGAAGACTCCTTAATTCTGACACGTTCCGCTCCGCATCATAAGAAAAC +TTTGATCAATCTGTTCTCTTCTCGAGACCGCCAGTAATAAACGGACACCCCTTTATGGCCGTTGTTTGAT +AAAGTGATCTGCACTGCGAAAAATTTCGCCAACCGATCTCCGGTCCGGCCACTACACGTAGAAGCGTTTC +ATCACACGACATGAATGAGTAGCCCTACCGTGGGCATCTTTGTTTGTCCGCTCAGCCTGATCCGCGGGTC +AGGGCGGAACAGTATGGTTGGACTCCTTTCGGTGCGGGCATACCTAGTTTTCCTGGGTTAGAAACTTTCC +CCAATCCGTTGTTAGAGCCATCTTTAGAACAGTCCAGCGGTCGTGGCGCGGAAAAGTGTTGAGGGCTGCG +GCGGTGACGGATCCTCACTCTTCGCAGAGCTCCGGTTAGCCCAGTATGAACCTCATAAACCGTCTAAGAA +TCGGTTCCGCATACGACTGAGGGCTGCGCAGTAAGCACATAATATAGCGTTGTCAATACAGATACGAATA +CCTGGTCTACGCCGCAAGAG +>4_0 +AATTCGACAAGACACTCTCCATACGCTATTGGATGGATTCTCGAGGGTACGTGTATGCGTGTCCAACCCT +CCCATAAACGCGTCGAGAATTATGACCGATAGTAGATGGCCCCGTACCGGATCGGTGGGGGCTTAACCGG +AGCTTACTCGCTGACCACTTCTGGCCGTCATCCAAGGTTAAAAACCACGCCACCCAGCACGAAACATATC +GTTAGCGCCCACTGTAGAGGTAGGTGCTCGAGAAGGGTATTTCTTCGCGTTTTCTATTCCCCAAGAATTC +GGCCTTGGAACTGGTTGTCCACTACTCGAGGTTCACCCCCTGACTGGATTTTTTGACCTGTTGTCCCGAG +GCCTCCGCCCATGTACGAGTGGCTTCCTAGATAGTGTCCTAACCCTCTGCACTAGTAGAGTGAGCCGCCC +CATCAATCAACCACAAATATAAAGAAGACTCCTTAATTCTGACACGTTCCGCTCCGCATCATAAGAAAAC +TTTGATCAATCTGTTCTCTTCTCGAGACCGCCAGTAATAAACGGACACCCCTTTATGGCCGTTGTTTGAT +AAAGTGATCTGCACTGCGAAAAATTTCGCCAACCGATCTCCGGTCCGGCCACTACACGTAGAAGCGTTTC +ATCACACGACATGAATGAGTAGCCCTACCGTGGGCATCTTTGTTTGTCCGCTCAGCCTGATCCGCGGGTC +AGGGCGGAACAGTATGGTTGGACTCCTTTCGGTGCGGGCATACCTAGTTTTCCTGGGTTAGAAACTTTCC +CCAATCCGTTGTTAGAGCCATCTTTAGAACAGTCCAGCGGTCGTGGCGCGGAAAAGTGTTGAGGGCTGCG +GCGGTGACGGATCCTCACTCTTCGCAGAGCTCCGGTTAGCCCAGTATGAACCTCATAAACCGTCTAAGAA +TCGGTTCCGCATACGACTGAGGGCTGCGCAGTAAGCACATAATATAGCGTTGTCAATACAGATACGAATA +CCTGGTCTACGCCGCAAGAG +>10_0 +AATTCGACAAGACACTCTCCATACGCTATTGGATGGATTCTCGAGGGTACGTGTATGCGTGTCCAACCCT +CCCATAAACGCGTCGAGAATTATGACCGATAGTAGATGGCCCCGTACCGGATCGGTGGGGGCTTAACCGG +AGCTTACTCGCTGACCACTTCTGGCCGTCATCCAAGGTTAAAAACCACGCCACCCAGCACGAAACATATC +GTTAGCGCCCACTGTAGAGGTAGGTGCTCGAGAAGGGTATTTCTTCGCGTTTTCTATTCCCCAAGAATTC +GGCCTTGGAACTGGTTGTCCACTACTCGAGGTTCACCCCCTGACTGGATTTTTTGACCTGTTGTCCCGAG +GCCTCCGCCCATGTACGAGTGGCTTCCTAGATAGTGTCCTAACCCTCTGCACTAGTAGAGTGAGCCGCCC +CATCAATCAACCACAAATATAAAGAAGACTCCTTAATTCTGACACGTTCCGCTCCGCATCATAAGAAAAC +TTTGATCAATCTGTTCTCTTCTCGAGACCGCCAGTAATAAACGGACACCCCTTTATGGCCGTTGTTTGAT +AAAGTGATCTGCACTGCGAAAAATTTCGCCAACCGATCTCCGGTCCGGCCACTACACGTAGAAGCGTTTC +ATCACACGACATGAATGAGTAGCCCTACCGTGGGCATCTTTGTTTGTCCGCTCAGCCTGATCCGCGGGTC +AGGGCGGAACAGTATGGTTGGACTCCTTTCGGTGCGGGCATACCTAGTTTTCCTGGGTTAGAAACTTTCC +CCAATCCGTTGTTAGAGCCATCTTTAGAACAGTCCAGCGGTCGTGGCGCGGAAAAGTGTTGAGGGCTGCG +GCGGTGACGGATCCTCACTCTTCGCAGAGCTCCGGTTAGCCCAGTATGAACCTCATAAACCGTCTAAGAA +TCGGTTCCGCATACGACTGAGGGCTGCGCAGTAAGCACATAATATAGCGTTGTCAATACAGATACGAATA +CCTGGTCTACGCCGCAAGAG +>3_0 +AATTCGACAAGACACTCTCCATACGCTATTGGATGGATTCTCGAGGGTACGTGTATGCGTGTCCAACCCA +CCCATAAACGCGTCGAGAATTATGACCGATAGTAGATGGCCCCGTACCGGATCGGTGGGGGCTTAACCGG +AGCTTACTCGCTGACCACTTCTGGCCGTCATCCAAGGTTAAAAACCACGCCACCCAGCACGAAACATATC +GTTAGCGCCCACTGTAGAGGTAGGTGCTCGAGAAGGGTATTTCTTCGCGTTTTCTATTCCCCAAGAATTC +GGCCTTGGAACTGGTTGTCCACTACTCGAGGTTCACCCCCTGACTGGATTTTTTGACCTGTTGTCCCGAG +GCCTCCGCCCATGTACGAGTGGCTTCCTAGATAGTGTCCTAACCCTCTGCACTAGTAGAGTGAGCCGCCC +CATCAATCAACCACAAATATAAAGAAGACTCCTTAATTCTGACACGTTCCGCTCCGCATCATAAGAAAAC +TTTGATCAATCTGTTCTCTTCTCGAGACCGCCAGTAATAAACGGACACCCCTTTATGGCCGTTGTTTGAC +AAAGTGATCTGCACTGCGAAAAATTTCGCCAACCGATCTCCGGTCCGGCCACTACACGTAGAAGCGTTTC +ATCACACGACATGAATGAGTAGCCCTACCGTGGGCATCTTTGTTTGTCCGCTCAGCCTGATCCGCGGGTC +AGGGCGGAACAGTATGGTTGGACTCCTTTCGGTGCGGGCATACCTAGTTTTCCTGGGTTAGAAACTTTCC +CCAATCCGTTGTTAGAGCCATCTTTAGAACAGTCCAGCGGTCGTGGCGCGGAAAAGTGTTGAGGGCTGCG +GCGGTGACGGATCCTCACTCTTCGCAGAGCTCCGGTTAGCCCAGTATGAACCTCATAAACCGTCTAAGAA +TCGGTTCCGCATACGACTGAGGGCTGCGCAGTAAGCACATAATATAGCGTTGTCAATACAGATACGAATA +CCTGGTCTACGCCGCAAGAG +>9_0 +AATTCGACAAGACACTCTCCATACGCTATTGGATGGATTCTCGAGGGTACGTGTATGCGTGTCCAACCCA +CCCATAAACGCGTCGAGAATTATGACCGATAGTAGATGGCCCCGTACCGGATCGGTGGGGGCTTAACCGG +AGCTTACTCGCTGACCACTTCTGGCCGTCATCCAAGGTTAAAAACCACGCCACCCAGCACGAAACATATC +GTTAGCGCCCACTGTAGAGGTAGGTGCTCGAGAAGGGTATTTCTTCGCGTTTTCTATTCCCCAAGAATTC +GGCCTTGGAACTGGTTGTCCACTACTCGAGGTTCACCCCCTGACTGGATTTTTTGACCTGTTGTCCCGAG +GCCTCCGCCCATGTACGAGTGGCTTCCTAGATAGTGTCCTAACCCTCTGCACTAGTAGAGTGAGCCGCCC +CATCAATCAACCACAAATATAAAGAAGACTCCTTAATTCTGACACGTTCCGCTCCGCATCATAAGAAAAC +TTTGATCAATCTGTTCTCGTCTCGAGACCGCCAGTAATAAACGGACACCCCTTTATGGCCGTTGTTTGAT +CAAGTGATCTGCACTACGAAAAATTTCGCCAACCGATCTCCGGTCCGGCCACTACACGTAGAAGCGTTTC +ATCACACGACATGAATGAGTAGCCCTACCGTGGGCATCTTTGTTTGTCCGCTCAGCCTGATCCGCGGGTC +AGGGCGGAACAGTATTGTTGGACTCCTTTCGGTGCGGGCATACCTAGTTTTCCTGGGTTAGAAACTTTCC +CCAATCCGTTGTTAGAGCCATCTTTAGAACAGTCCAGCGGTCGTGGCGCGGAAAAGTGTTGAGGGCTGCC +GCGGTGACGGATCCTCACTCTTCGCAGAGCTCCGGTTAGCCCAGTATGAACCTCATAAACCGTCTAAGAA +TCGGTTCCGCATACGACTGAGGGCTGCGCAGTAAGCACATAATATAGCGTTGTCAATACAGATACGAATA +CCTGGTCTACGCCGCAAGAG diff --git a/test/arg/TreeKnit/aln_b.fasta b/test/arg/TreeKnit/aln_b.fasta new file mode 100644 index 00000000..2e74a1a1 --- /dev/null +++ b/test/arg/TreeKnit/aln_b.fasta @@ -0,0 +1,160 @@ +>9_0 +ACATTTTGCGTGGGCTAAGAGGTGACAATCCTCTGCAAAGTCGAGGCGGGTTTACACAGCGAGCCTACGT +GACACATCGTGCGTTGGACGCCTTAGTTATAGAGCCACTGCATGCGCACTGAATCCCGTTATGGTAAGGG +TCAGACCCTGCTATACCTAACCAACGCCGACGCAGAGGGTGTGGCCGTGAAATAACTATATAGTTACTTA +GGGTCGATCTCACCATACCAAACGTGGGCACACTACGTGCAAATGAGCGTCCTTTGTAGGACTAAAATAC +TTCATTTTATTGACACAAACGGGGCCCTTGACATAGGCACTCAAGAATCGGTCCCCACGCGCAGTCGCCT +ACTCATACACTAAGGACGCGCTATATAACGTAAATGTGAGGATCGGCTTATGCCGCGCTAGAATGAGTAG +TGGCGTCGTGGATGCGAATGCTTCGAAGCTGCTGGGATAGGGCGGGCGATATAACGTGTTGCTACCGGTT +GGTCGGTGATTGGGGGTGTGGCCTTGTGGAATAGTCAACCGCAAGCGCCATCGCCAAGAGGTAGCTGTTG +ACAGGAATCATAGACATCCTTCATGGGATGTTTCAAATTAGCCCTGTGCATAACACTACGACTCGAATGT +GGCTTAACATTGGTGAACCAGAACCGTCGGACTCATCAGTTGCTCCTCACGGTCTGTCGCCACCACACGC +AGCACTTGAAATAAATCGTTAACTTTAGGGGGCTGGATGTGCCCTGCATGGGGCAGTGCGGGGCTGAGCG +TCCTGAAAGCAAACAATCGGCAGTCTCCTCATGGTCCACCGCCAGACCTTGCCAGAACAACAACGCCTTC +CCTTACCTATCGAGAGGGTAGTAGACATGGCCAGGAGCGCAGCCAGTGGTCGGGCGCCGGTCATGCGCTT +CTTTATTCCGAACTTGGACCGCGATTATTTCGCTCTTCCTGAATAGCGATTTATTTGATCGCCAGTAATC +ACATCATTGGGGAATGTTTC +>4_0 +ACATTTTGCGTGGACTAAGAGGTGACAATCCTCTGCAAAGTCGAGGCGGGTTTACACAGCGAGCCTACGT +GACACATCGTGCGTTGGACGCCTTAGTTATAGAGCCACTGCATGCGCACTGAATCCCGTTATGGTAAGGG +TCAGACCCTGCTATACCTAACCAACGCCGACGCAGAGGGTGTGGCCGTGAAATAACTATATAGTTACTTA +GGGTCGATCTCACCATACCAAACGTGGGCACACTACGTGCAAATGAGCGTCCTTTGTAGGACTAAAATAC +TTCATTTTATTGACACAAACGGGGCCCTTGACATAGGCACTCAAGAATCGGTCCCCACGCGCAGTCGCCT +ACTCATACACTAAGGACGCGCTATATAACGTAAATGTGAGGATCGGCTTATGCCGCGCTAGAATGAGTAG +TGGCGTCGTGGATGCGAATGCTTCGAAGCTGCTGGGATAGGGCGGGCGATATAACGTGTTGCTACCGGTT +GGTCGGTGATTGGGGGTGTGGCCTTGTGGAATAGTCAACCGCAAGCGCCATCGCCAAGAGGTAGCTGTTG +ACAGGAATCATAGACATCCTTCATGGGATGTTTCAAATTAGCCCTGTGCATAACACTACGACTCGAATGT +GGCTTAACATTGGTGAACCAGAACCGTCGGACTCATCAGTTGCTCCTCACGGTCTGTCGGCACCACACGC +AGCACTTGAAATAAATCGTTAACTTTAGGGGGCTGGATGTGCCCTGCATGGGGCAGTGCGGGGCTGAGCG +TCCTGAAAGCAAACAATCGGCAGTCTCCTCATGGTCCACCGCCAGACCTTGCCAGAACAACAACGCCTTC +CCTTACCTATCGAGAGGGTAGTAGACATGGCCAGGAGCGCAGCCAGTGGTCGGGCGCCGGTCATGCGCTT +CTTTATTCCGAACTTGGACCGCGATTATTTCGCTCTTCCTGAATAGCGATTTATTTGATCGCCAGTAATC +ACATCATTGGGGAATGTTTC +>10_0 +ACATTTTGCGTGGACTAAGAGGTGACAATCCTCTGCAAAGTCGAGGCGGGTTTACACAGCGAGCCTACGT +GACACATCGTGCGTTGGACGCCTTAGTTATAGAGCCACTGCATGCGCACTGAATCCCGTTATGGTAAGGG +TCAGACCCTGCTATACCTAACCAACGCCGACGCAGAGGGTGTGGCCGTGAAATAACTATATAGTTACTTA +GGGTCGATCTCACCATACCAAACGTGGGCACACTACGTGCAAATGAGCGTCCTTTGTAGGACTAAAATAC +TTCATTTTATTGACACAAACGGGGCCCTTGACATAGGCACTCAAGAATCGGTCCCCACGCGCAGTCGCCT +ACTCATACACTAAGGACGCGCTATATAACGTAAATGTGAGGATCGGCTTATGCCGCGCTAGAATGAGTAG +TGGCGTCGTGGATGCGAATGCTTCGAAGCTGCTGGGATAGGGCGGGCGATATAACGTGTTGCTACCGGTT +GGTCGGTGATTGGGGGTGTGGCCTTGTGGAATAGTCAACCGCAAGCGCCATCGCCAAGAGGTAGCTGTTG +ACAGGAATCATAGACATCCTTCATGGGATGTTTCAAATTAGCCCTGTGCATAACACTACGACTCGAATGT +GGCTTAACATTGGTGAACCAGAACCGTCGGACTCATCAGTTGCTCCTCACGGTCTGTCGGCACCACACGC +AGCACTTGAAATAAATCGTTAACTTTAGGGGGCTGGATGTGCCCTGCATGGGGCAGTGCGGGGCTGAGCG +TCCTGAAAGCAAACAATCGGCAGTCTCCTCATGGTCCACCGCCAGACCTTGCCAGAACAACAACGCCTTC +CCTTACCTATCGAGAGGGTAGTAGACATGGCCAGGAGCGCAGCCAGTGGTCGGGCGCCGGTCATGCGCTT +CTTTATTCCGAACTTGGACCGCGATTATTTCGCTCTTCCTGAATAGCGATTTATTTGATCGCCAGTAATC +ACATCATTGGGGAATGTTTC +>7_0 +ACATTTTGCGTGGACTAAGAGGTGACAATCCTCTGCAAAGTCGAGGCGGGTTTACACAGCGAGCCTACGT +GACACATCGTGCGTTGGACGCCTTAGTTATAGAGCCACTGCATGCGCACTGAATCCCGTTATGGTAAGGG +TCAGACCCTGCTATACCTAACCAACGCCGACGCAGAGGGTGTGGCCGTGAAATAACTATATAGTTACTTA +GGGTCGATCTCACCATACCAACCGTGGGCACACTACGTGCAAATGAGCGTCCTTTGTAGGACTAAAATAC +TTCATTTTATTGACACAAACGGGGCCCTTGACATAGGCACTCAAGAATCGGTCCCCACACGCAGTCGCCT +ACTCATACACTAAGGACGCGCTATATAACGTAAATGTGAGGATCGGCTTATGCCGCGCTAGAATGAGTAG +TGGCGTCGTGGATGCGAATGCTTCGAAGCTGCTGGGATAGGGCGGGCGATATAACGTGTTGCTACCGGTT +GGTCGGTGATTGGGGGTGTGGCCTTGTGGAATAGTCAACCGCAAGCGCCATCGCCAAGAGGTAGCTGTTG +ACAGGAATCATAGCCATCCTTCATGGGATGTTTCAAATTAGCCCTGTGCATAACACTACGACTCGAATGT +GGCTTAACATTGGTGAACCAGAACCGTCGGACTCATCAGTTGCTCCTCACGGTCTGTCGCCACCACACGC +AGCACTTGAAATAAATCGTTAACTTTAGGGGGCTGGATGTGCCCTGCATGGGGCAGTGCGGGGCTGAGCG +TCCTGAAAGCAAACAATCGGCAGTCTCCTCATGGTCCACCGCCAGACCTTGCCAGAACAACAACGCCTTC +CCTTACCTATCGAGAGGGTAGTAGACATGGCCAGGAGCGCAGCCAGTGGTCGGGCGCCGGTCATGCGCTT +CTTTATTCCGAACTTGGACCGCGATTATTTCGCTCTTCCTGAATAGCGATTTATTTGATCGCCAGTAATC +ACATCATTGGGGAATGTTTC +>8_0 +ACATTTTGCGTGGACTAAGAGGTGACAATCCTCTGCAAAGTCGAGGCGGGTTTACACAGCGAGCCTACGT +GACACATCGTGCGTTGGACGCCTTAGTTATAGAGCCACTGCATGCGCACTGAATCCCGTTATGGTAAGGG +TCAGACCCTGCTATACCTAACCAACGCCGACGCAGAGGGTGTGGCCGTGAAATAACTATATAGTTACTTA +GGGTCGATCTCACCATACCAAACGTGGGCACACTACGTGCAAATGAGCGTCCTTTGTAGGACTAAAATAC +TTCATTTTATTGACACAAACGGGGCCCTTGACATAGGCACTCAAGAATCGGTCCCCACGCGCAGTCGCCT +ACTCATACACTAAGGACGCGCTATATAACGTAAATGTGAGGATCGGCTTATGCCGCGTTAGAATGAGTGG +TGGCGTCGTGGATGCGAATGCTTCGAAGCTGCTGGGATAGGGCGGGCGATATAACGTGTTGCTACCGGTT +GGTCGGTGATTGGGGGTGTGGCCTTGTGGAATAGTCAACCGCAAGCGCCATCGCCAAGAGGTAGCTGTTG +ACAGGAATCATAGACATCCTTCATGGGATGTTTCAAATTAGCCCTGTGCATAGCACTACGACTCGAATGT +GGCTTAACATTGGTGAACCAGAACCGTCGGACTCATCAGTTGCTCCTCACGGTCTGTCGCCACCACACGC +AGCACTTGAAATAAATCGTTAACTTTAGGGGGCTGGATGTGCCCTGCATGGGGCAGTGCGGGGCTGAGCG +TCCTGAAAGCAAACAATCGGCAGTCTCCTCATGGTCCACCGCCAGACCTTGCCAGAACAACAACGCCTTC +CCTTACCTATCGAGAGGGTAGTAGACATGGCCAGGAGCGCAGCCAGTGGTCGGGCGCCGGTCATGCGCTT +CTTTATTCCGAACTTGGACCGCGATTATTTCGCTCTTCCTGAATAGCGATTTATTTGATCGCCAGTAATC +ACATCATTGGGGAATGTTTC +>5_0 +ACATTTTGCGTGGACTAAGAGGTGACAATCCTCTGCAAAGTCGAGGCGGGTTTACACAGCGAGCCTACGT +GACACATCGTGCGTTGGACGCCTTAGTTATAGAGCCACTGCATGCGCACTGAATCCCGTTATGGTAAGGG +TCAGACCCTGCTATACCTAACCAACGCCGACGCAGAGGGTGTGGCCGTGAAATAACTATATAGTTACTTA +GGGTCGATCTCACCATACCAAACGTGGGCACACTACGTGCAAATGAGCGTCCTTTGTAGGACTAAAATAC +TTCATTTTATTGACACAAACGGGGCCCTTGACATAGGCACTCAAGAATCGGTCCCCACGCGCAGTCGCCT +ACTCATACACTAAGGACGCGCTATATAACGTAAATGTGAGGATCGGCTTATGCCGAGTTAGAATGAGTGG +TGGCGTCGTGGATGCGAATGCTTCGAAGCTGCTGGGATAGGGCGGGCGATATAACGTGTTGCTACCGGTT +GGTCGGTGATTGGGGGTGTGGCCTTGTGGAATAGTCAACCGCAAGCGCCATCGCCAAGAGGTAGCTGTTG +ACAGGAATCATAGACATCCTTCATGGGATGTTTCAAATTAGCCCTGTGCATAACACTACGACTCGAATGT +GGCTTAACATTGGTGAACCAGAACCGTCGGACTCATCAGTTGCTCCTCACGGTCTGTCGCCACCACACGC +AGCACTTGAAATAAATCGTTAACTTTAGGGGGCTGGATGTGCCCTGCATGGGGCAGTGCAGGGCTGAGCG +TCCTGAAAGCAAACAATCGGCAGTCTCCTCATGGTCCACCGCCAGACCTTGCCAGAACAACAACGCCTTC +CCTTACCTATCGAGAGGGTAGTAGACATGGCCAGGCGCGCAGCCAGTGGTCGGGCGCCGGTCATGCGCTT +CTTTATTCCGAACTTGGACCGCGATTATTTCGCTCTTCCTGAATAGCGATTTATTTGATCGCCAGTAATC +ACATCATTGGGGAATGTTTC +>3_0 +ACATTTTGCGTGGACTAAGAGGTGACAATCCTCTGCAAAGTCGAGGCGGGTTTACACAGCGAGCCTACGT +GACACATCGTGCGTTGGACGCCTTAGTTATAGAGCCACTGCATGCGCACTGAATCCCGTTATGGTAAGGG +TCAGACCCTGCTATACCTAACCAACGCCGACGCAGAGGGTGTGGCCGTGAAATAACTATATAGTTACTTA +GGGTCGATCTCACCATACCAAACGTGGGCACACTACGTGCAAATGAGCGTCCTTTGTAGGACTAAAATAC +TTCATTTTATTGACACAAACGGGGCCCTTGACATAGGCACTCAAGAATCGGTCCCCACGCGCAGTCGCCT +ACTCATACACTAAGGACGCGCTATATAACGTAAATGTGAGGATCGGCTTATGCCGCGTTAGAATGAGTGG +TGGCGTCGTGGATGCGAATGCTTCGAAGCTGCTGGGATAGGGCGGGCGATATAACGTGTTGCTACCGGTT +GGTCGGTGATTGGGGGTGTGGCCTTGTGGAATAGTCAACCGCAAGCGCCATCGCCAACAGGTAGCTGTTG +ACAGGAATCATAGACATCCTTCATGGGATGTTTCAAATTAGCCCTGTGCATAACACTACGACTCGAATGT +GGCTTAACATTGGTGAACCAGAACCGTCGGACTCATCAGTTGCTCCTCACGGTCTGTCGCCACCACACGC +AGCACTTGAAATAAATCGTTAACTTTAGGGGGCTGGATGTGCCCTGCATGGGGCAGTGCGGGGCTGAGCG +TCCTGAAAGCAAACAATCGGCAGTCTCCTCATGGTCCACCGCCAGACCTTGCCAGAACAACAACGCCTTC +CCTTACCTATCGAGAGGGTAGTAGACATGGCCAGGAGCGCAGCCAGTGGTCGGGCGCCGGTCATGCGCTT +CTTTATTCCGAACTTGGACCGCGATTATTTCGCTCTTCCTGAATAGCGATTTATTTGATCGCCAGTAATC +ACATCATTGGGGAATGTTTC +>1_0 +ACATTTTGCGTGGACTAAGAGGTGACAATCCTCTGCAAAGTCGAGGCGGGTTTACACAGCGAGCCTACGT +GACACATCGTGCGTTGGACGCCTTAGTTATAGAGCCACTGCATGCGCACTGAATCCCGTTATGGTAAGGG +TCAGACCCTGCTATACCTAACCAACGCCGACACAGAGGGTGTGGCCGTGAAATAACTATATAGTTACTTA +GGGTCGATCTCACCATACCAAACGTGGGCACACTACGTGCAAATGAGCGTCCTTTGTAGGACTAAAATAC +TTCATTTTATTGACACAAACGGGGCCCTTGACATAGGCACTCAAGAATCGGTCCCCACGCGCAGTCGCCT +ACTCATACACTAAGGACGCGCTATATAACGTAAATGTGAGGATCGGCTTATGCCGCGTTAGAATGAGTGG +TGGCGTCCTGGATGCGAATGCTTCGAAGCTGCTGGGATAGGGCGGGCGATATAACGTGTTGCTACCGGTT +GGTCGGTGATTGGGGGTGTGGCCTTGTGGAATAGTCAACCGCAAGCGCCATCGCCAAGAGGTAGCTGTTG +ACAGGAATCATAGACATCCTTCATGGGATGTTTCAAATTAGCCCTGTGCATAACACTACGACTCGAATGT +GGCTTAACATTGGTGAACCAGAACCGTCGGACTCATCAGTTGCTCCTCACGGTCTGTCGCCACCACACGC +AGCACTTGAAATAAATCGTTAACTTTAGGGGGCTGGATGTGCCCTGCATGGGGCAGTGCGGGGCTGAGCG +TCCTGAAAGCAAACAATCGGCAGTCTCCTCATGGTCCACCGCCAGACCTTGCCAGAACAACAAGGCCTTC +CCTTACCTATCGAGAGGGTAGTAGACATGGCCAGGAGCGCAGCCAGTGGTCGGGCGCCGGTCATGCGCTT +CTTTATTCCGAACTTGGACCGCGATTATTTCGCTCTTCCTGAATAGCGATTTATTTGATCGCCAGTAATC +ACATCATTGGGGAATGTTTC +>6_0 +ACATTTTGCGTGGACTAAGAGGTGACAATCCTCTGCAAAGTCGAGGCGGGTTTACACAGCGAGCCTACGT +GACACATCGTGCGTTGGACGCCTTAGTTATAGAGCCACTGCATGCGCACTGAATCCCGTTATGGTAAGGG +TCAGACCCTGCTATACCTAACCAACGCCGACACAGAGGGTGTGGCCGTGAAATAACTATATAGTTACTTA +GGGTCGATCTCACCATACCAAACGTGGGCACACTACGTGCAAATGAGCGTCCTTTGTAGGACTAAAATAC +TTCATTTTATTGACACAAACGGGGCCCTTGACATAGGCACTCAAGAATCGGTCCCCACGCGCAGTCGCCT +ACTCATACACTAAGGACGCGCTATATAACGTAAATGTGAGGATCGGCTTATGCCGCGTTAGAATGAGTGG +TGGCGTCCTGGATGCGAATGCTTCGAAGCTGCTGGGATAGGGCGGGCGATATAACGTGTTGCTACCGGTT +GGTCGGTGATTGGGGGTGTGGCCTTGTGGAATAGTCAACCGCAAGCGCCATCGCCAAGAGGTAGCTGTTG +ACAGGAATCATAGACATCCTTCATGGGATGTTTCAAATTAGCCCTGTGCATAACACTACGACTCGAATGT +GGCTTAACATTGGTGAACCAGAACCGTCGGACTCATCAGTTGCTCCTCACGGTCTGTCGCCACCACACGC +AGCACTTGAAATAAATCGTTAACTTTAGGGGGCTGGATGTGCCCTGCATGGGGCAGTGCGGGGCTGAGCG +TCCTGAAAGCAAACAATCGGCAGTCTCCTCATGGTCCACCGCCAGACCTTGCCAGAACAACAAGGCCTTC +CCTTACCTATCGAGAGGGTAGTAGACATGGCCAGGAGCGCAGCCAGTGGTCGGGCGCCGGTCATGCGCTT +CTTTATTCCGAACTTGGACCGCGATTATTTCGCTCTTCCTGAATAGCGATTTATTTGATCGCCAGTAATC +ACATCATTGGGGAATGTTTC +>2_0 +ACATTTTGCGTGGACTAAGAGGTGACAATCCTCTGCAAAGTCGAGGCGGGTTTACACAGCGAGCCTACGT +GACACATCGTGCGTTGGACGCCTTAGTTATAGAGCCACTGCATGCGCACTGAATCCCGTTATGGTAAGGG +TCAGACCCTGCTATACCTAACCAACGCCGACACAGAGGGTGTGGCCGTGAAATAACTATATAGTTACTTA +GGGTCGATCTCACCATACCAAACGTGGGCACACTACGTGCAAATGAGCGTCCTTTGTAGGACTAAAATAC +TTCATTTTATTGACACAAACGGGGCCCTTGACATAGGCACTCAAGAATCGGTCCCCACGCGCAGTCGCCT +ACTCATACACTAAGGACGCGCTATATAACGTAAATGTGAGGATCGGCTTATGCCGCGTTAGAATGAGTGG +TGGCGTCCTGGATGCGAATGCTTCGAAGCTGCTGGGATAGGGCGGGCGATATAACGTGTTGCTACCGGTT +GGTCGGTGATTGGGGGTGTGGCCTTGTGGAATAGTCAACCGCAAGCGCCATCGCCAAGAGGTAGCTGTTG +ACAGGAATCATAGACATCCTTCATGGGATGTTTCAAATTAGCCCTGTGCATAACACTACGACTCGAATGT +GGCTTAACATTGGTGAACCAGAACCGTCGGACTCATCAGTTGCTCCTCACGGTCTGTCGCCACCACACGC +AGCACTTGAAATAAATCGTTAACTTTAGGGGGCTGGATGTGCCCTGCATGGGGCAGTGCGGGGCTGAGCG +TCCTGAAAGCAAACAATCGGCAGTCTCCTCATGGTCCACCGCCAGACCTTGCCAGAACAACAAGGCCTTC +CCTTACCTATCGAGAGGGTAGTAGACATGGCCAGGAGCGCAGCCAGTGGTCGGGCGCCGGTCATGCGCTT +CTTTATTCCGAACTTGGACCGCGATTATTTCGCTCTTCCTGAATAGCGATTTATTTGATCGCCAGTAATC +ACATCATTGGGGAATGTTTC diff --git a/test/arg/TreeKnit/aln_c.fasta b/test/arg/TreeKnit/aln_c.fasta new file mode 100644 index 00000000..6638cd99 --- /dev/null +++ b/test/arg/TreeKnit/aln_c.fasta @@ -0,0 +1,160 @@ +>9_0 +ACCAAGTCGGTGTTGGGCGTAGGGCTGTGCTCCCAGTTTTTTACCTCAAAATAATTGGGGAATTGGATAT +TAACTATGCAAAGTAGTTCTAAAGTTTCGAATTCTTAGCCGCGTCAGAACGTAATGGCAAGTTGTCAGTA +TAGAGACAGCCTATCGCCCCCCGGAGTTTCGCGACGTATAGAAGGTGAGTCTAATTGGTCCAGTCCTAAA +ATGCTTCGCTGACTCTGCCTGACTATGCAGGTGATCTAAAACAAAATTAACTACCGGCGCGCATTAAGAG +ATAAACATCAGCGAGAGTGGGCGCACCGGAGTCGCATCGATACAGTCAAGACCGGTTATTTTTCATCGGA +GCACCGGACCAGCCAATATGCGCCTGTGCGCTCATGGACCTGCCTCTTCTTAAGCCTCCACGGTATGGGT +TAAGTGAGCCTTCGCGGCCTCATAGGCTTACGCCGGGCATACTGACATGACTTTAGTGTCACGGCCGCAT +ACTCGCCTTTCGCAAGGCGGCTTAGCGGGCTGCCTCTAGTTAATATACGTTCCCTCAAGCTAAAATATCA +CCAATGCAAAAGGTGCCATGCGTACCATTAATGCTACTTCCTGCGAAGGCCCCTCCATCGAGAATGCTCT +TCCTATATTAGAGTCACATGAACTCGCCTTGTTAGATGAGTACTTCAGTCAGCCTACAAACCCCTCGCGT +AGCCGCAGAACCATTGTGGCCACTGATCGCGGTTCAAACTTTGGTCTGATCGCAACTCAGTCGACAGCTT +AAACGGGTCAGGCGGACAGGCCCCACTGGTGCGGTTCTCAGAGGAGACCTAAGACGTTGTACCATACAAG +AATTCGGAGTAAGACGTGCGGACGAGTCTGATCCGGAAGCACCGATTAATTCTGAGGGGTAAGGGAGATT +CCACCTTCCGGAACCTCCGCAGAGGTCGCGCGTTACGGGATCTGGCACTGCACGCGTTTATTGCTTTTTT +ATGTCGTGTTCGCGGACCGA +>4_0 +ACCAAGTCGGTGTTGGGCGTAGGGCTGTGCTCCCAGTTTTTTACCTCAAAATAATTGGGGAATTGGATAT +TAACTATGCAAAGTAGTTCTAAAGTTTCGAATTCTTAGCCGCGTCAGAACGTAATGGCAAGTTGTCAGTA +TAGAGACAGCCTATCGCCCCCCGGAGTTTCGCGACGTATAGAAGGTGAGTCTAATTGGTCCAGTCCTAAA +ATGCTTCGCTGACTCTGCCTGACTATGCAGGTGATCTAAAACAAAATTAACTACCGGCGCGCATTAAGAG +ATAAACATCAGCGAGAGTGGGCGCACCGGAGTCGCATCGATACAGTCAAGACCGGTTATTTTTCATCGGA +GCACCGGACCAGCCAATATGCGCCTGTGCGCTCATGGACCTGCCTCTTCTTAAGCCTCCACGGTATGGGT +TAAGTGAGCCTTCGCGGCCTCATAGGCTTACGCCGGGCATACTGACATGACTTTAGTGTCACCGCCGCAT +ACTCGCCTTTCGCAAGGCGGCTTAGCGGGCTGCCTCTAGTTAATATACGTTCCCTCAAGCTAAAATATCA +CCAATGCAAAAGGTGCCATGCGTACCATTAATGCTACTTCCTGCAAAGGCCCCTCCATCGAGAATGCTCT +TCCTATATTAGAGTCACATGAACTCGCCTTGTTAGATGAGTACTTCAGTCAGCCTACAAACCCCTCGCGT +AGCCGCAGAACCATTGTGGCCACTGATCGCGGTTCAAACTTTGGTCTGATCGCAACTCAGTCGACAGCTT +AAACGGGTCAGGCGGACAGGCCCCACTGGTGCGGTTCTCAGAGGAGACCTAAGACGTTGTACCATACAAG +AATTCGGAGTAAGACGTGCGGACGAGTCTGATCCGGAAGCACCGATTAATTCTGAGGGGTAAGGGAGATT +CCACCTTCCGGAACCTCCGCAGAGGTCGCGCGTTACGGGATCTGGCACTGCACGCGTTTATTGCTTTTTT +ATGTCGTGTTCGCGGACCGA +>10_0 +ACCAAGTCGGTGTTGGGCGTAGGGCTGTGCTCCCAGTTTTTTACCTCAAAATAATTGGGGAATTGGATAT +TAACTATGCAAAGTAGTTCTAAAGTTTCGAATTCTTAGCCGCGTCAGAACGTAATGGCAAGTTGTCAGTA +TAGAGACAGCCTATCGCCCCCCGGAGTTTCGCGACGTATAGAAGGTGAGTCTAATTGGTCCAGTCCTAAA +ATGCTTCGCTGACTCTGCCTGACTATGCAGGTGATCTAAAACAAAATTAACTACCGGCGCGCATTAAGAG +ATAAACATCAGCGAGAGTGGGCGCACCGGAGTCGCATCGATACAGTCAAGACCGGTTATTTTTCATCGGA +GCACCGGACCAGCCAATATGCGCCTGTGCGCTCATGGACCTGCCTCTTCTTAAGCCTCCACGGTATGGGT +TAAGTGAGCCTTCGCGGCCTCATAGGCTTACGCCGGGCATACTGACATGACTTTAGTGTCACGGCCGCAT +ACTCGCCTTTCGCAAGGCGGCTTAGCGGGCTGCCTCTAGTTAATATACGTTCCCTCAAGCTAAAATATCA +CCAATGCAAAAGGTGCCATGCGTACCATTAATGCTACTTCCTGCAAAGGCCCCTCCATCGAGAATGCTCT +TCCTATATTAGAGTCACATGAACTCGCCTTGTTAGATGAGTACTTCAGTCAGCCTACAAACCCCTCGCGT +AGCCGCAGAACCATTGTGGCCACTGATCGCGGTTCAAACTTTGGTCTGATCGCAACTCAGTCGACAGCTT +AAACGGGTCAGGCGGACAGGCCCCACTGGTGCGGTTCTCAGAGGAGACCTAAGACGTTGTACCATACAAG +AATTCGGAGTAAGACGTGCGGACGAGTCTGATCCGGAAGCACCGATTAATTCTGAGGGGTAAGGGAGATT +CCACCTTCCGGAACCTCCGCAGAGGTCGCGCGTTACGGGATCTGGCACTGCACGCGTTTATTGCTTTTTT +ATGTCGTGTTCGCGGACCGA +>7_0 +ACCAAGTCGGTGTTGGGCGTAGGGCTGTGCTCCCAGTTTGTTACCTCAAAATAATTGGGGAATTGGATAT +TAACTATGCAAAGTAGTTCTAAAGTTTCGAATTCTTAGCCGCGTCAGAACGTAATGGCAAGTTGTCAGTA +TAGAGACAGCCTATCGCCCCCCGGAGTTTCGCGACGTATAGAAGGTGAGTCTAATTGGTCCAGTCCTAAA +ATGCTTCGCTGACTCTGCCTGACTATGCAGGTGATCTAAAACAAAATTAACTACCGGCGCGCATTAAGAG +ATAAACATCAGCGAGAGTGGGCGCACCGGAGTCGCATCGATACAGTCAAGACCGGTTATTTTTCATCGGA +GCACCGGACCAGCCAATATGCGCCTGTGCGCTCATGGACCTGCCTCTTCTTAAGCCTCCACGGTATGGGT +TAAGTGAGCCTTCGCGGCCTCATAGGCTTACGCCGGGCATACTGACATGACTTTAGTGTCACGGCCGCAT +ACTCGCCTTTCGCAAGGCGGCTTAGCGGGCTGCCTCTAGTTAATATACGTTCCCTCAAGCTAAAATATCA +CCAATGCAAAAGGTGCCATGCGTACCATTAATGCTACTTCCTGCAAAGGCCCCTCCATCGAGAATGCTCT +TCCTATATTAGAGTCACATGAACTCGCCTTGTTAGATGAGTACTTCAGTCAGCCTACAAACCCCTCGCGT +AGCCGCAGAACCATTGTGGCCACTGATCGCGGTTCAAACTTTGGTCTGATCGCAACTCAGTCGACAGCTT +AAACGGGTCAGGCGGACAGGCCCCACTGGTGCGGTTCTCAGAGGAGACCTAAGACGTTGTACCATACAAG +AATTCGGAGTAAGACGTGCGGACGAGTCTGATCCGGAAGCACCGATTAATTCTGAGGGGTAAGGGAGATT +CCACCTTCCGGAACCTCCGCAGAGGTCGCGCGTTACGGGATCTGGCACTGCACGCGTTTATTGCTTTTTT +ATGTCGTGTTCGCGGGCCGA +>8_0 +ACCAAGTCGGTGTTGGGCGTAGGGCTGTGCTCCCAGTTTTTTACCTCAAAATAATTGGGGAATTGGATAT +TAACTATGCAAAGTAGTTCTAAAGTTTCGAATTCTTAGCCGCGTCAGAACGTAATGGCAAGTTGTCAGTA +TAGAGACAGCCTATCGCCCCCCGGAGTTTCGCGACGTATAGAAGGTGAGTCTAATTGGTCCAGTCCTAAA +ATGCTTCGCTGACTCTGCCTGACTATGCAGGTGATCTAAAACAAAATTAACTACCGGCGCGCATTAAGAG +ATAAACATCAGCGAGAGTGGGCGCACCGGAGTCGCATCGATACAGTCAAGACCGGTTATTTTTCATCGGA +GCACCGGACCAGCCAATATGCGCCTGTGCGCTCATGGACCTGCCTCTTCTTAAGCCTCCACGGTATGGGT +TAAGTGAGCCTTCGCGGCCTCATAGGCTTACGCCGGGCATACTGACATGACTTTAGTGTCACGGCCGCAT +ACTCGCCTTTCGCAAGGCGGCTTAGCGGGCTGACTCTAGTTAATATACGTTCCCTCAAGCTAAAATATCA +CCAATGCAAAAGGTGCCATGCGTACCATTAATGCTACTTCCTGCAAAGGCCCCTCCATCGAGAATGCTCT +TCCTATATTAGAGTCGCATGAACTCGCCTTGTTAGATGAGTACTTCAGTCAGCCTACAAACACCTCGCGT +AGCCGCAGAACCATTGTGGCCACTGATCGCGGTTCAAACTTTGGTCTGATCGCAACTCAGTCGACAGCTT +AAACGGGTCAGGCGGACAGGCCCCACTGGTGCGGTTCTCAGAGGAGACCTAAGACGTTGTACCATACAAG +AATTCGGAGTAAGACGTGCGGACGAGTCTGATCCCGAAGCACCGATTAATTCTGAGGGGTAAGGGAGATT +CCACCTTCCGGAACCTCCGCAGCGGTCGCGCGTTACGGGATCTGGCACTGCACGCGTTTATTGCTTTTTT +ATGTCGTGTTCGCGGACCGA +>5_0 +ACCAAGTCGGTGTTGGGCGTAGGGCTGTGCTCCCAGTTTTTTACCTCAAAATAATTGGGGAATTGGATAT +TAACTATGCAAAGTAGTTCTAAAGTTTCGAATTCTTAGCCGCGTCAGAACGTAATGGCAAGTTGTCAGTA +TAGAGACAGCCTATCGCCCCCCGGAGTTTCGCGACGTATAGAAGGTGAGTCTAATTGGTCCAGTCCTAAA +ATGCTTCGCTGACTCTGCCTGACTATGCAGGTGATCTAAAACAAAATTAACTACCGGCGCGCATTAAGAG +ATAAACATCAGCGAGAGTGGGCGCACCGGAGTCGCATCGATACAGTCAAGACCGGTTATTTTTCATCGGA +GCACCGGACCAGCCAATATGCGCCTGTGCGCTCATGGACCTGCCTCTTCTTAAGCCTCCACGGTATGGGT +TAAGTGAGCCTTCGCGGCCTCATAGGCTTACGCCGGGCATACTGACATGACTTTAGTGTCACGGCCGCAT +ACTCGCCTTTCGCAAGGCGGCTTAGCGGGCTGACTCTAGTTAATATACGTTCCCTCAAGCTAAAATATCA +CCAATGCAAAAGGTGCCATGCGTACCATTAATGCTACTTCCTGCAAAGGCCCCTCCATCGAGAATGCTCT +TCCTATATTAGAGTCGCATGAACTCGCCTTGTTAGATGAGTACTTCAGTCAGCCTACAAACCCCTCGCGT +AGCCGCAGAACCATTGTGGCCACTGATCGCGGTTCAAACTTTGGTCTGATCGCAACTCAGTCGACAGCTT +AAACGGGTCAGGCGGACAGGCCCCACTGGTGCGGTTCTCAGAGGAGACCTAAGACGTTGTACCATACAAG +AATTCGGAGTAAGACGTGCGGACGAGTCTGATCCCGAAGCACCGATTAATTCTGAGGGGTAAGGGAGATT +CCACCTTCCGGAACCTCCGCAGCGGTCGCGCGTTACGGGATCTGGCACTGCACGCGTTTATTGCTTTTTT +ATGTCGTGTTCGCGGACCGA +>3_0 +ACCAAGTCGGTGTTGGGCGTAGGGCTGTGCTCCCAGTTTTTTACCTCAAAATAATTGGGGAATTGGATAT +TAACTATGCAAAGTAGTTCTAAAGTTTCGAATTCTTAGCCGCGTCAGAACGTAATGGCAAGTTGTCAGTA +TAGAGACAGCCTATCGCCCCCCGGAGTTTCGCGACGTATAGAAGGTGAGTCTAATTGGTCCAGTCCTAAA +ATGCTTCGCTGACTCTGCCTGACTATGCAGGTGATCTAAAACAAAATTAACTACCGGCGCGCATTAAGAG +ATAAACATCAGCGAGAGTGGGCGCACCGGAGTCGCATCGATACAGTCAAGACCGGTTATTTTTCATCGGA +GCACCGGACCAGCCAATATGCGCCTGTGCGCTCATGGACCTGCCTCTTCTTAAGCCTCCACGGTATGGGT +TAAGTGAGCCTTCGCGGCCTCATAGGCTTACGCCGGGCATACTGACATGACTTTAGTGTCACGGCCGCAT +ACTCGCCTTTCGCAAGGCGGCTTAGCGGGCTGACTCTAGTTAATATACGTTCCCTCAAGCTAAAATATCA +CCAATGCAAAAGGTGCCATGCGTACCATTAATGCTACTTCCTGCAAAGGCCCCTCCATCGAGAATGCTCT +TCCTATATTAGAGTCGCATGAACTCGCCTTGTTAGATGAGTACTTCAGTCAGCCTACAAACCCCTCGCGT +AGCCGCAGAACCATTGTGGCCACTGATCGCGGTTCAAACTTTGGTCTGATCGCAACTCAGTCGACAGCTT +AAACGGGTCAGGCGGACAGGCCCCACTGGTGCGGTTCTCAGAGGAGACCTAAGACGTTGTACCATACAAG +AATTCGGAGTAAGACGTGCGGACGAGTCTGATCCCGAAGCACCGATTAATTCTGAGGGGTAAGGGAGATT +CCACCTTCCGGAACCTCCGCAGCGGTCGCGCGTTACGGGATCTGGCACTGCACGCGTTTATTGCTTTTTT +ATGTCGTGTTCGCGGACCGA +>1_0 +ACCAAGTCGGTGTTGGGCGTAGGGCTGTGCTCCCAGTTTTTTACCTCAAAATAATTGGGGAATTGGATAT +TAACTATGCAAAGTAGTTCTAAAGTTTCGAATTCTTAGCCGCGTCAGAACGTAATGGCAAGTTGTCAGTA +TAGAGACAGCCTATCGCCCCCCGGAGTTTCGCGACGTATAGAAGGTGAGTCTAATTGGTCCAGTCCTAAA +ATGCTTCGCTGACTCTGCCTGACTATGCAGGTGATCTAAAACAAAATTAACTACCGGCGCGCATTAAGAG +ATAAACATCAGCGAGAGTGGGCGCACCGGAGTCGCATCGATACAGTCAAGACCGGTTATTTTTCATCGGA +GCACCGGACCAGCCAATATGCGCCTGTGCGCTCATGGACCTGCCTCTTCTTAAGCCTCCACGGTATGGGT +TAAGTGTGCCTTCGCGGCCTCATAGGCTTACGCCGGGCATACTGACATGACTTTAGTGTCACGGCCGCAT +ACTCGCCTTTCGCAAGGCGGCTTAGCGGGCTGACTCTAGTTAATATACGTTCCCTCAAGCTAAAATATCA +CCAATGCAAAAGGTGCCATGCGTACCATTAATGCTACTTCCTGCAAAGGCCCCTCCATCGAGAATGCTCT +TCCTATATTAGAGTCGCATGAACTCGCCTTGTTAGATGAGTACTTCAGTCAGCCTAGAAACCCCTCGCGT +AGCCGCAGAACCATTGTGGCCACTGATCGCGGTTCAAACGTTGGTCTGATCGCAACTCAGTCGACAGCTT +AAACGGGTCAGGCGGACAGGCCCCACTGGTGCGGTTCTCAGAGGAGACCTAAGACGTTGTACCATACAAG +AATTCGGAGTAAGACGTGCGGACGAGTCTGATCCGGAAGCACCGATTAATTCTGAGGGGTAAGGGAGATT +CCACCTTCCGGAACCTCCGCAGAGGTCGCGCGTTACGGGATCTGGCACTGCACGCGTTTATTGCTTTTTT +ATGTCGTGTTCGCGGACCGA +>6_0 +ACCAAGTCGGTGTTGGGCGTAGGGCTGTGCTCCCAGTTTTTTACCTCAAAATAATTGGGGAATTGGATAT +TAACTATGCAAAGTAGTTCTAAAGTTTCGAATTCTTAGCCGCGTCAGAACGTAATGGCAAGTTGTCAGTA +TAGAGACAGCCTATCGCCCCCCGGAGTTTCGCGACGTATAGAAGGTGAGTCTAATTGGTCCAGTCCTAAA +ATGCTTCGCTGACTCTGCCTGACTATGCAGGTGATCTAAAACAAAATTAACTACCGGCGCGCATTAAGAG +ATAAACATCAGCGAGAGTGGGCGCACCGGAGTCGCATCGATACAGTCAAGACCGGTTATTTTTCATCGGA +GCACCGGACCAGCCAATATGCGCCTGTGCGCTCATGGACCTGCCTCTTCTTAAGCCTCCACGGTATGGGT +TAAGTGTGCCTTCGCGGCCTCATAGGCTTACGCCGGGCATACTGACATGACTTTAGTGTCACGGCCGCAT +ACTCGCCTTTCGCAAGGCGGCTTAGCGGGCTGACTCTAGTTAATATACGTTCCCTCAAGCTAAAATATCA +CCAATGCAAAAGGTGCCATGCGTACCATTAATGCTACTTCCTGCAAAGGCCCCTCCATCGAGAATGCTCT +TCCTATATTAGAGTCGCATGAACTCGCCTTGTTAGATGAGTACTTCAGTCAGCCTAGAAACCCCTCGCGT +AGCCGCAGAACCATTGTGGCCACTGATCGCGGTTCAAACGTTGGTCTGATCGCAACTCAGTCGACAGCTT +AAACGGGTCAGGCGGACAGGCCCCACTGGTGCGGTTCTCAGAGGAGACCTAAGACGTTGTACCATACAAG +AATTCGGAGTAAGACGTGCGGACGAGTCTGATCCGGAAGCACCGATTAATTCTGAGGGGTAAGGGAGATT +CCACCTTCCGGAACCTCCGCAGAGGTCGCGCGTTACGGGATCTGGCACTGCACGCGTTTATTGCTTTTTT +ATGTCGTGTTCGCGGACCGA +>2_0 +ACCAAGTCGGTGTTGGGCGTAGGGCTGTGCTCCCAGTTTTTTACCTCAAAATAATTGGGGAATTGGATAT +TAACTATGCAAAGTAGTTCTAAAGTTTCGAATTCTTAGCCGCGTCAGAACGTAATGGCAAGTTGTCAGTA +TAGAGACAGCCTATCGCCCCCCGGAGTTTCGCGACGTATAGAAGGTGAGTCTAATTGGTCCAGTCCTAAA +ATGCTTCGCTGACTCTGCCTGACTATGCAGGTGATCTAAAACAAAATTAACTACCGGCGCGCATTAAGAG +ATAAACATCAGCGAGAGTGGGCGCACCGGAGTCGCATCGATACAGTCAAGACCGGTTATTTTTCATCGGA +GCACCGGACCAGCCAATATGCGCCTGTGCGCTCATGGACCTGCCTCTTCTTAAGCCTCCACGGTATGGGT +TAAGTGTGCCTTCGCGGCCTCATAGGCTTACGCCGGGCATACTGACATGACTTTAGTGTCACGGCCGCAT +ACTCGCCTTTCGCAAGGCGGCTTAGCGGGCTGACTCTAGTTAATATACGTTCCCTCAAGCTAAAATATCA +CCAATGCAAAAGGTGCCATGCGTACCATTAATGCTACTTCCTGCAAAGGCCCCTCCATCGAGAATGCTCT +TCCTATATTAGAGTCGCATGAACTCGCCTTGTTAGATGAGTACTTCAGTCAGCCTAGAAACCCCTCGCGT +AGCCGCAGAACCATTGTGGCCACTGATCGCGGTTCAAACGTTGGTCTGATCGCAACTCAGTCGACAGCTT +AAACGGGTCAGGCGGACAGGCCCCACTGGTGCGGTTCTCAGAGGAGACCTAAGACGTTGTACCATACAAG +AATTCGGAGTAAGACGTGCGGACGAGTCTGATCCGGAAGCACCGATTAATTCTGAGGGGTAAGGGAGATT +CCACCTTCCGGAACCTCCGCAGAGGTCGCGCGTTACGGGATCTGGCACTGCACGCGTTTATTGCTTTTTT +ATGTCGTGTTCGCGGACCGA diff --git a/test/arg/TreeKnit/metadata.csv b/test/arg/TreeKnit/metadata.csv new file mode 100644 index 00000000..888cf3bb --- /dev/null +++ b/test/arg/TreeKnit/metadata.csv @@ -0,0 +1,11 @@ +name,date +1_0,2000.01 +6_0,2000.01 +2_0,2000.01 +8_0,2000.01 +5_0,2000.01 +7_0,2000.01 +4_0,2000.01 +10_0,2000.01 +3_0,2000.01 +9_0,2000.01 diff --git a/test/arg/TreeKnit/tree_a_resolved.nwk b/test/arg/TreeKnit/tree_a_resolved.nwk index eff6a0ee..83856060 100644 --- a/test/arg/TreeKnit/tree_a_resolved.nwk +++ b/test/arg/TreeKnit/tree_a_resolved.nwk @@ -1 +1 @@ -((3,1,10,(4,7)),(2,6),(5,8,9)); \ No newline at end of file +(((1_0:0.00042160595317589734,(6_0:1.0808942097513535e-5,2_0:1.0808942097513535e-5)internal_1:0.00041079701107838383)internal_4:0.004424706951683469,(8_0:0.0008463505955400933,5_0:0.0008463505955400933)internal_9:0.003999962309319273)internal_14:0.00039448806504147895,(((7_0:0.000593105927716494,(4_0:3.1975715213597586e-5,10_0:3.1975715213597586e-5)internal_2:0.0005611302125028965)internal_6:5.255445800663714e-5,3_0:0.0006456603857231312)internal_7:0.0007718518012316954,9_0:0.0014175121869548265)internal_11:0.003823288782946018)internal_15:0; \ No newline at end of file diff --git a/test/arg/TreeKnit/tree_b_resolved.nwk b/test/arg/TreeKnit/tree_b_resolved.nwk index eeb63d4f..c0507fc4 100644 --- a/test/arg/TreeKnit/tree_b_resolved.nwk +++ b/test/arg/TreeKnit/tree_b_resolved.nwk @@ -1 +1 @@ -(((1,10),3),(((4,7),(2,6)),((5,9),8))); \ No newline at end of file +((((9_0:0.0007157734425760744,(4_0:3.1975715213597586e-5,10_0:3.1975715213597586e-5)internal_2:0.0006837977273624769)internal_8:0.0007017387443787521,7_0:0.0014175121869548265)internal_11:0.0024284388519823638,((8_0:0.0008463505955400933,5_0:0.0008463505955400933)internal_9:5.698925281919542e-5,3_0:0.0009033398483592889)internal_10:0.0029426111905779015)internal_13:0.0013948499309636548,(1_0:0.00042160595317589734,(6_0:1.0808942097513535e-5,2_0:1.0808942097513535e-5)internal_1:0.00041079701107838383)internal_4:0.004819195016724947)internal_15:0; \ No newline at end of file diff --git a/test/arg/TreeKnit/tree_c_resolved.nwk b/test/arg/TreeKnit/tree_c_resolved.nwk index 3c840a17..c0507fc4 100644 --- a/test/arg/TreeKnit/tree_c_resolved.nwk +++ b/test/arg/TreeKnit/tree_c_resolved.nwk @@ -1 +1 @@ -((((1,10),(4,7)),3),(2,6),((5,9),8)); \ No newline at end of file +((((9_0:0.0007157734425760744,(4_0:3.1975715213597586e-5,10_0:3.1975715213597586e-5)internal_2:0.0006837977273624769)internal_8:0.0007017387443787521,7_0:0.0014175121869548265)internal_11:0.0024284388519823638,((8_0:0.0008463505955400933,5_0:0.0008463505955400933)internal_9:5.698925281919542e-5,3_0:0.0009033398483592889)internal_10:0.0029426111905779015)internal_13:0.0013948499309636548,(1_0:0.00042160595317589734,(6_0:1.0808942097513535e-5,2_0:1.0808942097513535e-5)internal_1:0.00041079701107838383)internal_4:0.004819195016724947)internal_15:0; \ No newline at end of file diff --git a/test/arg/aln_a.fasta b/test/arg/aln_a.fasta deleted file mode 100644 index 308dbefa..00000000 --- a/test/arg/aln_a.fasta +++ /dev/null @@ -1,160 +0,0 @@ ->1 -ATGCATGCACCAGAAGGGCGGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGGCAGACAAAGAC -GAATTGGTGCAATATAGCTATATATCCTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGGGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCTACCTCCATAGTCCGTGCTACACGCCGG -GGTCAGCGCGCATCGCGTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGCCGTAGCAGATAAATGTGCTATATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGCATCTCTGCGTTATCATACCTACAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGGCCAAGATGAAATTCTAAAGTTAGCC -GAGGTGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->2 -ATGCATGCACCAGAAGGGCGGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGGCAGACAAAGAC -GAATTGGTGCAATATAGCTATATATCCTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGGGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCTACCTCCATAGTCCGTGCTACACGCCGG -GGTCAGCGCGCATCGCGTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGCCGTAGCAGATAAATGTGCTATATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGCATCTCTGCGTTATCATACCTACAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGGCCAAGATGAAATTCTAAAGTTAGCC -GAGGTGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->10 -ATGCATGCACCAGAAGGGCGGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGGCAGACAAAGAC -GAATTGGTGCAATATAGCTATATATCCTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGGGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCTACCTCCATAGTCCGTGCTACACGCCGG -GGTCAGCGCGCATCGCGTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGCCGTAGCAGATAAATGTGCTATATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGCATCTCTGCGTTATCATACCTACAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGGCCAAGATGAAATTCTAAAGTTAGCC -GAGGTGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->4 -ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAATAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGCCAAAGAC -GAATTGGTGCAATATAGCTATAGAGACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG -GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGACGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTATAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGGC -GAGGAGTTTCTCGAACCAACGTAGAGTCAGGGCAGCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTCTCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->5 -ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAATAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGCCAAAGAC -GAATTGGTGCAATATAGCTATAGAGACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG -GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGACGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTATAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGGC -GAGGAGTTTCTCGAACCAACGTAGAGTCAGGGCAGCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTCTCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->6 -ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAATAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGCCAAAGAC -GAATTGGTGCAATATAGCTATAGAGACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG -GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGACGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTATAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGGC -GAGGAGTTTCTCGAACCAACGTAGAGTCAGGGCAGCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTCTCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAGACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->7 -ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC -GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT -GAAGGACAGCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG -GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACGGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGCC -GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCTCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTGCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGCCG ->8 -ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC -GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTACTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATGATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATCCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG -GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGCCAG -TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTCGCC -GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCGCGTTTGGTCCTTGAAATCCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->9 -ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC -GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTACTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATGATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATCCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG -GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAACCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTCGCC -GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCGCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCTCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCACGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->3 -ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC -GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTAGTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATGATAGATGAATATCAAGCTCATCTTAAGGACGGCA -ATCCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG -GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTCGCC -GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCGCGTTTGGTCCTTGAAATCCGCTC -GACCCACCGGGCGCCGAATTTAACTCTCTAATGTTACTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG \ No newline at end of file diff --git a/test/arg/aln_b.fasta b/test/arg/aln_b.fasta deleted file mode 100644 index 308dbefa..00000000 --- a/test/arg/aln_b.fasta +++ /dev/null @@ -1,160 +0,0 @@ ->1 -ATGCATGCACCAGAAGGGCGGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGGCAGACAAAGAC -GAATTGGTGCAATATAGCTATATATCCTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGGGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCTACCTCCATAGTCCGTGCTACACGCCGG -GGTCAGCGCGCATCGCGTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGCCGTAGCAGATAAATGTGCTATATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGCATCTCTGCGTTATCATACCTACAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGGCCAAGATGAAATTCTAAAGTTAGCC -GAGGTGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->2 -ATGCATGCACCAGAAGGGCGGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGGCAGACAAAGAC -GAATTGGTGCAATATAGCTATATATCCTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGGGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCTACCTCCATAGTCCGTGCTACACGCCGG -GGTCAGCGCGCATCGCGTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGCCGTAGCAGATAAATGTGCTATATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGCATCTCTGCGTTATCATACCTACAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGGCCAAGATGAAATTCTAAAGTTAGCC -GAGGTGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->10 -ATGCATGCACCAGAAGGGCGGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGGCAGACAAAGAC -GAATTGGTGCAATATAGCTATATATCCTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGGGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCTACCTCCATAGTCCGTGCTACACGCCGG -GGTCAGCGCGCATCGCGTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGCCGTAGCAGATAAATGTGCTATATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGCATCTCTGCGTTATCATACCTACAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGGCCAAGATGAAATTCTAAAGTTAGCC -GAGGTGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->4 -ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAATAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGCCAAAGAC -GAATTGGTGCAATATAGCTATAGAGACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG -GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGACGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTATAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGGC -GAGGAGTTTCTCGAACCAACGTAGAGTCAGGGCAGCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTCTCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->5 -ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAATAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGCCAAAGAC -GAATTGGTGCAATATAGCTATAGAGACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG -GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGACGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTATAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGGC -GAGGAGTTTCTCGAACCAACGTAGAGTCAGGGCAGCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTCTCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->6 -ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAATAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGCCAAAGAC -GAATTGGTGCAATATAGCTATAGAGACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG -GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGACGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTATAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGGC -GAGGAGTTTCTCGAACCAACGTAGAGTCAGGGCAGCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTCTCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAGACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->7 -ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC -GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT -GAAGGACAGCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG -GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACGGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGCC -GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCTCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTGCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGCCG ->8 -ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC -GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTACTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATGATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATCCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG -GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGCCAG -TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTCGCC -GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCGCGTTTGGTCCTTGAAATCCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->9 -ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC -GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTACTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATGATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATCCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG -GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAACCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTCGCC -GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCGCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCTCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCACGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->3 -ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC -GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTAGTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATGATAGATGAATATCAAGCTCATCTTAAGGACGGCA -ATCCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG -GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTCGCC -GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCGCGTTTGGTCCTTGAAATCCGCTC -GACCCACCGGGCGCCGAATTTAACTCTCTAATGTTACTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG \ No newline at end of file diff --git a/test/arg/aln_c.fasta b/test/arg/aln_c.fasta deleted file mode 100644 index 308dbefa..00000000 --- a/test/arg/aln_c.fasta +++ /dev/null @@ -1,160 +0,0 @@ ->1 -ATGCATGCACCAGAAGGGCGGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGGCAGACAAAGAC -GAATTGGTGCAATATAGCTATATATCCTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGGGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCTACCTCCATAGTCCGTGCTACACGCCGG -GGTCAGCGCGCATCGCGTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGCCGTAGCAGATAAATGTGCTATATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGCATCTCTGCGTTATCATACCTACAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGGCCAAGATGAAATTCTAAAGTTAGCC -GAGGTGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->2 -ATGCATGCACCAGAAGGGCGGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGGCAGACAAAGAC -GAATTGGTGCAATATAGCTATATATCCTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGGGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCTACCTCCATAGTCCGTGCTACACGCCGG -GGTCAGCGCGCATCGCGTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGCCGTAGCAGATAAATGTGCTATATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGCATCTCTGCGTTATCATACCTACAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGGCCAAGATGAAATTCTAAAGTTAGCC -GAGGTGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->10 -ATGCATGCACCAGAAGGGCGGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGGCAGACAAAGAC -GAATTGGTGCAATATAGCTATATATCCTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGGGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCTACCTCCATAGTCCGTGCTACACGCCGG -GGTCAGCGCGCATCGCGTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGCCGTAGCAGATAAATGTGCTATATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGCATCTCTGCGTTATCATACCTACAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGGCCAAGATGAAATTCTAAAGTTAGCC -GAGGTGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->4 -ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAATAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGCCAAAGAC -GAATTGGTGCAATATAGCTATAGAGACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG -GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGACGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTATAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGGC -GAGGAGTTTCTCGAACCAACGTAGAGTCAGGGCAGCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTCTCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->5 -ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAATAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGCCAAAGAC -GAATTGGTGCAATATAGCTATAGAGACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG -GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGACGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTATAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGGC -GAGGAGTTTCTCGAACCAACGTAGAGTCAGGGCAGCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTCTCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->6 -ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAATAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGCCAAAGAC -GAATTGGTGCAATATAGCTATAGAGACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG -GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGACGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTATAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGGC -GAGGAGTTTCTCGAACCAACGTAGAGTCAGGGCAGCGGTCTGCGGCGCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTCTCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAGACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->7 -ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC -GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTTCTTTGCATGATTTT -GAAGGACAGCTCTTCGCGGATCGCCCGAGATAGATTATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATTCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG -GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACGGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTAGCC -GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCTCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTGCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGCCG ->8 -ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC -GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTACTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATGATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATCCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG -GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGCCAG -TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTCGCC -GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCGCGTTTGGTCCTTGAAATCCGCTC -GACCCACCTGGCGCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->9 -ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC -GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTACTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATGATAGATGAATATCAAGCTCATCTTCAGGACGGCA -ATCCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG -GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAACCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTCGCC -GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCGCGTTTGGTCCTTGAAATGCGCTC -GACCCACCTGGCTCCGAATTTAACTCTCTAATGTTTCTCACGTTATAACACGACGCACGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG ->3 -ATGCATGCACCAGAAGGGCCGCGTCCGTCGACCCGTACAAGTACTGGTCGCGTCGGCGAACAGCTTCATG -CCTGTCGTTCCTCGTTGCCGAAGAACGGCGCCCTATTAGGCGAGCCTGTCCAAAGATGACAGACAAAGAC -GAATTGGTGCAATATAGCTATATATACTTTTGGGCCCACTTTATAGGTCTCAACTAGTTTGCATGATTTT -GAAGGACACCTCTTCGCGGATCGCCCGAGATAGATGATAGATGAATATCAAGCTCATCTTAAGGACGGCA -ATCCTAGGGAGCAGTAGCATGGCATCATGATATGGAGGCAGCAACCTCCATATTCCGTGCTACACGCCGG -GGTCGGCGCGCATCGCTTCATAATTGTGGAACATTGACCAGGGTGATTGGCATGTACAACGTCATGACAG -TATAAAAACCCATGCCGTAGCAGATAAATGTGCTCTATGATAAGTGTCGTGACGATAACGTATGGGCGCG -GTAACGCTCTTAGAATTGTGCCACTATAAACCGGGTATCACTGCGTTATCATCCCTACAAATCTTCCAAA -GGAAATCTTCCCGTGGAATACTGGTAGCCACACCGCTTCATTCTAAGTTAAAGAGACGTTATGATAATTG -GGCGACTTAGAGGGTCCCCTTCTAATCCAAATCTATTGTCAATGTCCAAGATGAAATTCTAAAGTTCGCC -GAGGAGTTTCTCGAACCAACGTAGAGTCAGTGCATCGGTCTGGGGCGCGTTTGGTCCTTGAAATCCGCTC -GACCCACCGGGCGCCGAATTTAACTCTCTAATGTTACTCACGTTATAACACGACGCCCGGTATACCTAAC -GTCCCCCATTAGCAACTTACGATGCGCCCTAGACAATATGCATGAAAATAGTAATGCCTTACCTACGCCT -GTAAACAAGTCCGGCTAATCTGCGAATACGCTTTGGGGATAGGACTGCCCATTGCAATGACTCTACGTAA -CTCCTATATTTTCTTTGTCG \ No newline at end of file diff --git a/test/arg/metadata.csv b/test/arg/metadata.csv deleted file mode 100644 index eb2d74b9..00000000 --- a/test/arg/metadata.csv +++ /dev/null @@ -1,11 +0,0 @@ -name, date -1,2015.30 -2,2015.30 -3,2015.30 -4,2015.30 -5,2015.30 -6,2015.30 -7,2015.30 -8,2015.30 -9,2015.30 -10,2015.30 \ No newline at end of file diff --git a/test/command_line_tests.sh b/test/command_line_tests.sh index e6397952..d4aff7ef 100755 --- a/test/command_line_tests.sh +++ b/test/command_line_tests.sh @@ -54,6 +54,14 @@ else echo "timetree_inference on vcf data failed $retval" fi +treetime arg --trees arg/TreeKnit/tree_a_resolved.nwk arg/TreeKnit/tree_b_resolved.nwk arg/TreeKnit/tree_c_resolved.nwk --alignments arg/TreeKnit/aln_a.fasta arg/TreeKnit/aln_b.fasta arg/TreeKnit/aln_c.fasta --mccs arg/TreeKnit/MCCs.json --dates arg/TreeKnit/metadata.csv --clock-rate 0.0028 --outdir time_tree_arg_results +retval="$?" +if [ "$retval" == 0 ]; then + echo "timetree arg on 3 trees ok" +else + ((all_tests++)) + echo "timetree arg on 3 trees failed $retval" +fi if [ "$all_tests" == 0 ];then echo "All tests passed" diff --git a/test/test_arg.py b/test/test_arg.py index 825fd125..51b61404 100644 --- a/test/test_arg.py +++ b/test/test_arg.py @@ -19,8 +19,8 @@ def test_assign_mccs(): from treetime.utils import parse_dates tree = Phylo.read('test/arg/TreeKnit/tree_a_resolved.nwk', 'newick') - tt = TreeTime(dates=parse_dates("test/arg/metadata.csv"), tree=tree, - aln="test/arg/aln_a.fasta", gtr='JC69', alphabet='nuc', verbose=True, + tt = TreeTime(dates=parse_dates("test/arg/TreeKnit/metadata.csv"), tree=tree, + aln="test/arg/TreeKnit/aln_a.fasta", gtr='JC69', alphabet='nuc', verbose=True, fill_overhangs=True, keep_node_order=True, compress=False) @@ -29,12 +29,14 @@ def test_assign_mccs(): MCC_locs = [frozenset(["tree_a", "tree_b"]), frozenset(["tree_a", "tree_c"])] MCCs = [MCC_dict[loc] for loc in MCC_locs] leaf_to_MCC = arg.get_mcc_map(MCCs) - assert leaf_to_MCC == {'3': [0, 0], '6': [1, 1], '8': [2, 2], '4': [3, 3], '7': [3, 3], '1': [4, 3], '2': [4, 3], '5': [4, 3], '9': [4, 3], '10': [4, 3]} + assert leaf_to_MCC == {'3_0': [0, 0], '10_0': [1, 1], '4_0': [1, 1], '5_0': [2, 2], '8_0': [2, 2], '1_0': [3, 3], '2_0': [3, 3], '6_0': [3, 3], '7_0': [3, 3], '9_0': [3, 3]} arg.assign_all_mccs(tt.tree, len(MCCs), leaf_to_MCC, tt.one_mutation) - assert all([tt.tree.root.mcc == c.mcc for c in tt.tree.root.clades]) - assert sorted([c.mcc for c in tt.tree.root.clades[0]]) == sorted([[0, 0], [3,3], [4, 3], [4,3]]) - assert sorted([c.mcc for c in tt.tree.root.clades[1]]) == sorted([[1, 1], [4, 3]]) - assert sorted([c.mcc for c in tt.tree.root.clades[2]]) == sorted([[2, 2], [4, 3], [4,3]]) + for node in tt.tree.find_clades(): + assert node.mcc[0] == node.mcc[1] + if set([c.name for c in node.clades]) == set(["4_0", "10_0"]): + node.mcc == [1,1] + if set([c.name for c in node.clades]) == set(["8_0", "5_0"]): + node.mcc == [2,2] test_assign_mccs() @@ -43,13 +45,44 @@ def test_parse_args(): import numpy as np tree_nwk_files = ['test/arg/TreeKnit/tree_a_resolved.nwk','test/arg/TreeKnit/tree_b_resolved.nwk', 'test/arg/TreeKnit/tree_c_resolved.nwk'] - aln_files = ['test/arg/aln_a.fasta','test/arg/aln_b.fasta', 'test/arg/aln_c.fasta'] + aln_files = ['test/arg/TreeKnit/aln_a.fasta','test/arg/TreeKnit/aln_b.fasta', 'test/arg/TreeKnit/aln_c.fasta'] MCC_file = 'test/arg/TreeKnit/MCCs.json' dict_ = arg.parse_arg(tree_nwk_files, aln_files, MCC_file, fill_overhangs=True) - assert sum(dict_["masks"][frozenset(["tree_a"])]) == sum(dict_["masks"][frozenset(["tree_b"])]) == sum(dict_["masks"][frozenset(["tree_c"])]) ==1000 - assert all(dict_["masks"][frozenset(["tree_a"])] == np.concatenate((np.ones(1000), np.zeros(2000)))) - assert all(dict_["masks"][frozenset(["tree_a", "tree_b"])] == np.concatenate((np.ones(2000), np.zeros(1000)))) - assert all(dict_["masks"][frozenset(["tree_a", "tree_b", "tree_c"])] == np.ones(3000)) + assert sum(dict_["masks_dict"][frozenset(["tree_a"])]) == sum(dict_["masks_dict"][frozenset(["tree_b"])]) == sum(dict_["masks_dict"][frozenset(["tree_c"])]) ==1000 + assert all(dict_["masks_dict"][frozenset(["tree_a"])] == np.concatenate((np.ones(1000), np.zeros(2000)))) + assert all(dict_["masks_dict"][frozenset(["tree_a", "tree_b"])] == np.concatenate((np.ones(2000), np.zeros(1000)))) + assert all(dict_["masks_dict"][frozenset(["tree_a", "tree_b", "tree_c"])] == np.ones(3000)) -test_parse_args() \ No newline at end of file +test_parse_args() + +def test_setup_arg(): + from treetime import arg + from treetime.utils import parse_dates + import numpy as np + + tree_nwk_files = ['test/arg/TreeKnit/tree_a_resolved.nwk','test/arg/TreeKnit/tree_b_resolved.nwk', 'test/arg/TreeKnit/tree_c_resolved.nwk'] + aln_files = ['test/arg/TreeKnit/aln_a.fasta','test/arg/TreeKnit/aln_b.fasta', 'test/arg/TreeKnit/aln_c.fasta'] + MCC_file = 'test/arg/TreeKnit/MCCs.json' + dates = parse_dates("test/arg/TreeKnit/metadata.csv") + + dict_ = arg.parse_arg(tree_nwk_files, aln_files, MCC_file, fill_overhangs=True) + + ##check if arg is set up correctly on tree_b + masked_tree_b = arg.setup_arg(dict_["trees_dict"], dict_["alignment"], dates, dict_["MCCs_dict"], dict_["masks_dict"], "tree_b", gtr='JC69', + verbose=0, fill_overhangs=True, reroot=False, fixed_clock_rate=0.001, alphabet='nuc') + + node_dict = {} + for node in masked_tree_b.tree.find_clades(): + node_dict[node.name] = node + for node in masked_tree_b.tree.find_clades(): + if node.name == "3_0" or set([c.name for c in node.clades]) == set(["4_0", "10_0"]) or set([c.name for c in node.clades]) == set(["3_0", "internal_9"]) or set([c.name for c in node.clades]) == set(["8_0", "5_0"]): + assert all(node.mask == np.concatenate((np.zeros(1000), np.ones(2000)))) + elif node.name in set([c.name for c in masked_tree_b.tree.root.clades]): + assert all(node.mask == np.concatenate((np.ones(2000), np.zeros(1000)))) + elif not node.up: + assert all(node.mask == np.concatenate((np.zeros(1000), np.ones(1000), np.zeros(1000)))) + else: + assert all(node.mask == np.ones(3000)) + +test_setup_arg() diff --git a/treetime/arg.py b/treetime/arg.py index beebce10..b41e1e99 100644 --- a/treetime/arg.py +++ b/treetime/arg.py @@ -23,7 +23,7 @@ def get_MCC_dict(MCC_file): return MCC_dict -def get_mask(length_segments, tree_names): +def get_mask_dict(length_segments, tree_names): pos_list = [0] for l in length_segments: new = pos_list[-1] + l @@ -64,10 +64,12 @@ def parse_arg(tree_files, aln_files, MCC_file, fill_overhangs=True): tree_names = get_tree_names(tree_files) trees_in_dict = set().union(*MCC_dict.keys()) assert(all([k in trees_in_dict for k in tree_names])) - trees = [Phylo.read(t, 'newick') for t in tree_files] + trees_dict = {} + for i in range(0, len(tree_files)): + trees_dict[tree_names[i]] = Phylo.read(tree_files[i], 'newick') # determine common terminal nodes - all_leaves = set.intersection(*[set([x.name for x in t.get_terminals()]) for t in trees]) + all_leaves = set.intersection(*[set([x.name for x in t.get_terminals()]) for (k, t) in trees_dict.items()]) # read alignments and construct edge modified sequence arrays alignments = [{s.id:s for s in AlignIO.read(aln, 'fasta')} for aln in aln_files] @@ -88,12 +90,12 @@ def parse_arg(tree_files, aln_files, MCC_file, fill_overhangs=True): # construct masks for the concatenation and the two segments l = [len(a[leaf]) for a in alignments] - masks = get_mask(l, tree_names) + masks = get_mask_dict(l, tree_names) - return {"MCCs": MCC_dict, "trees":trees, "alignment":MultipleSeqAlignment(aln_combined), - "masks":masks} + return {"MCCs_dict": MCC_dict, "trees_dict":trees_dict, "alignment":MultipleSeqAlignment(aln_combined), + "masks_dict":masks} -def setup_arg(T, aln, dates, MCCs, masks, gtr='JC69', +def setup_arg(trees_dict, alignments, dates, MCCs_dict, masks_dict, tree_name, gtr='JC69', verbose=0, fill_overhangs=True, reroot=True, fixed_clock_rate=None, alphabet='nuc', **kwargs): """construct a TreeTime object with the appropriate masks on each node for branch length optimization with full or segment only alignment. @@ -115,8 +117,20 @@ def setup_arg(T, aln, dates, MCCs, masks, gtr='JC69', """ from treetime import TreeTime + T= trees_dict[tree_name] ##desired tree + + ##get list of MCCs of all trees with T and the order of these trees + MCCs = [] + tree_order = {} + i = 0 + for t in trees_dict.keys(): + if t != tree_name: + tree_order[i] = t + MCCs.append(MCCs_dict[frozenset([tree_name, t])]) + i +=1 + tt = TreeTime(dates=dates, tree=T, - aln=aln, gtr=gtr, alphabet=alphabet, verbose=verbose, + aln=alignments, gtr=gtr, alphabet=alphabet, verbose=verbose, fill_overhangs=fill_overhangs, keep_node_order=True, compress=False, **kwargs) @@ -131,9 +145,11 @@ def setup_arg(T, aln, dates, MCCs, masks, gtr='JC69', # assign masks to branches whenever child and parent are in the same MCC for n in tt.tree.find_clades(): - shared = [(n.mcc[pos] is not None) and n.up and n.up.mcc[pos]==n.mcc[pos] for pos in len(MCCs)] - pos_shared = frozenset([i for i, x in enumerate(shared) if x]) - n.mask = masks[pos_shared] + shared = [(n.mcc[pos] is not None) and n.up and n.up.mcc[pos]==n.mcc[pos] for pos in range(len(MCCs))] + ##use tree_order to convert position in MCC list to tree_names and see which trees share this branch and assign a proper mask + pos_shared = [tree_order[i] for i, x in enumerate(shared) if x] + pos_shared.append(tree_name) + n.mask = masks_dict[frozenset(pos_shared)] return tt diff --git a/treetime/argument_parser.py b/treetime/argument_parser.py index ae4aef75..6734f792 100644 --- a/treetime/argument_parser.py +++ b/treetime/argument_parser.py @@ -308,8 +308,8 @@ def toplevel(params): description="Calculates the root-to-tip regression and quantifies the 'clock-i-ness' of the tree. " "It will reroot the tree to maximize the clock-like " "signal and recalculate branch length unless run with --keep_root.") - arg_parser.add_argument('--trees', nargs=2, required=True, type=str) - arg_parser.add_argument('--alignments', nargs=2, required=True, type=str) + arg_parser.add_argument('--trees', nargs='+', required=True, type=str) + arg_parser.add_argument('--alignments', nargs='+', required=True, type=str) arg_parser.add_argument('--mccs', required=True, type=str) add_timetree_args(arg_parser) add_time_arguments(arg_parser) @@ -318,7 +318,18 @@ def toplevel(params): add_reroot_group(arg_parser) add_common_args(arg_parser) - arg_parser.set_defaults(func=arg_time_trees) + + def toplevel_arg(params): + if len(params.trees) <2 or len(params.alignments) <2: + print(treetime_description+timetree_description+subcommand_description+ + "'arg' requires at least two tree and alignment files.\n") + elif len(params.trees) != len(params.alignments): + print(treetime_description+timetree_description+subcommand_description+ + "'arg' requires the same number of tree and alignment files, it is important that these are given in the same order.\n") + else: + arg_time_trees(params) + + arg_parser.set_defaults(func=toplevel_arg) # make a version subcommand diff --git a/treetime/wrappers.py b/treetime/wrappers.py index 77a973ba..f4e977fc 100644 --- a/treetime/wrappers.py +++ b/treetime/wrappers.py @@ -484,22 +484,22 @@ def arg_time_trees(params): """ from .arg import parse_arg, setup_arg - arg_params = parse_arg(params.trees[0], params.trees[1], - params.alignments[0], params.alignments[1], params.mccs, + arg_params = parse_arg(params.trees, + params.alignments, params.mccs, fill_overhangs=not params.keep_overhangs) dates = utils.parse_dates(params.dates, date_col=params.date_column, name_col=params.name_column) root = None if params.keep_root else params.reroot - for i,(tree,mask) in enumerate(zip(arg_params['trees'], arg_params['masks'])): + for tree_name in arg_params['trees_dict'].keys(): outdir = get_outdir(params, f'_ARG-treetime') gtr = create_gtr(params) - tt = setup_arg(tree, arg_params['alignment'], arg_params['combined_mask'], mask, dates, arg_params['MCCs'], - gtr=gtr, verbose=params.verbose, fill_overhangs=not params.keep_overhangs, + tt = setup_arg(arg_params['trees_dict'], arg_params['alignment'], dates, arg_params['MCCs_dict'], arg_params['masks_dict'], + tree_name, gtr=gtr, verbose=params.verbose, fill_overhangs=not params.keep_overhangs, fixed_clock_rate = params.clock_rate, reroot=root) - - run_timetree(tt, params, outdir, tree_suffix=f"_{i+1}", prune_short=False, method_anc=params.method_anc) + + run_timetree(tt, params, outdir, tree_suffix=f"_"+tree_name, prune_short=False, method_anc=params.method_anc) From e43ce0946a42e963de032ad6af7a7124f56a5691 Mon Sep 17 00:00:00 2001 From: Richard Neher Date: Mon, 15 Aug 2022 11:37:21 +0200 Subject: [PATCH 04/12] chore: rename variables, remove unused code --- treetime/arg.py | 81 +++++++++++++------------------------------------ 1 file changed, 21 insertions(+), 60 deletions(-) diff --git a/treetime/arg.py b/treetime/arg.py index b41e1e99..6ab52d00 100644 --- a/treetime/arg.py +++ b/treetime/arg.py @@ -14,7 +14,7 @@ def get_tree_names(tree_nwk_files): raise Exception("Error: Tree names must be unique, see TreeKnit output format.") return tree_names -def get_MCC_dict(MCC_file): +def get_MCC_dict(MCC_file): f = open(MCC_file) data = json.load(f) MCC_dict = {} @@ -95,6 +95,7 @@ def parse_arg(tree_files, aln_files, MCC_file, fill_overhangs=True): return {"MCCs_dict": MCC_dict, "trees_dict":trees_dict, "alignment":MultipleSeqAlignment(aln_combined), "masks_dict":masks} + def setup_arg(trees_dict, alignments, dates, MCCs_dict, masks_dict, tree_name, gtr='JC69', verbose=0, fill_overhangs=True, reroot=True, fixed_clock_rate=None, alphabet='nuc', **kwargs): """construct a TreeTime object with the appropriate masks on each node @@ -134,7 +135,6 @@ def setup_arg(trees_dict, alignments, dates, MCCs_dict, masks_dict, tree_name, g fill_overhangs=fill_overhangs, keep_node_order=True, compress=False, **kwargs) - if reroot: tt.reroot("least-squares", force_positive=True, clock_rate=fixed_clock_rate) @@ -145,56 +145,16 @@ def setup_arg(trees_dict, alignments, dates, MCCs_dict, masks_dict, tree_name, g # assign masks to branches whenever child and parent are in the same MCC for n in tt.tree.find_clades(): - shared = [(n.mcc[pos] is not None) and n.up and n.up.mcc[pos]==n.mcc[pos] for pos in range(len(MCCs))] + shared = [(n.mcc[other_tree] is not None) and n.up and n.up.mcc[other_tree]==n.mcc[other_tree] + for other_tree in range(len(MCCs))] ##use tree_order to convert position in MCC list to tree_names and see which trees share this branch and assign a proper mask - pos_shared = [tree_order[i] for i, x in enumerate(shared) if x] - pos_shared.append(tree_name) - n.mask = masks_dict[frozenset(pos_shared)] - + branch_shared = [tree_order[i] for i, x in enumerate(shared) if x] + branch_shared.append(tree_name) + n.mask = masks_dict[frozenset(branch_shared)] return tt -def assign_mccs(tree, mcc_map, one_mutation=1e-4): - """Assign MCCs to all terminal and internal branches of the tree. - - Args: - tree (Bio.Phylo.Tree): tree - mcc_map (dict): map from leaf to mcc - one_mutation (float, optional): minimal length of branches. Defaults to 1e-4. - """ - # assign MCCs to leaves - for leaf in tree.get_terminals(): - leaf.child_mccs = set([mcc_map[leaf.name]]) - leaf.mcc = mcc_map[leaf.name] - leaf.branch_length = max(0.5*one_mutation, leaf.branch_length) - - # reconstruct MCCs with Fitch algorithm - for n in tree.get_nonterminals(order='postorder'): - common_mccs = set.intersection(*[c.child_mccs for c in n]) - n.branch_length = max(0.5*one_mutation, n.branch_length) - if len(common_mccs): - n.child_mccs = common_mccs - else: - n.child_mccs = set.union(*[c.child_mccs for c in n]) - - mcc_intersection = set.intersection(*[c.child_mccs for c in tree.root]) - if len(mcc_intersection): - tree.root.mcc = list(mcc_intersection)[0] - else: - tree.root.mcc = None - - for n in tree.get_nonterminals(order='preorder'): - if n==tree.root: - continue - else: - if n.up.mcc in n.child_mccs: # parent MCC part of children -> that is the MCC - n.mcc = n.up.mcc - elif len(n.child_mccs)==1: # child is an MCC - n.mcc = list(n.child_mccs)[0] - else: # no unique child MCC and no match with parent -> not part of an MCCs - n.mcc = None - def get_mcc_map(MCCs_list): # make a lookup for the MCCs and assign to trees leaf_to_MCC = {} @@ -207,6 +167,7 @@ def get_mcc_map(MCCs_list): leaf_to_MCC[leaf].append(mi) return leaf_to_MCC + def assign_all_mccs(tree, len_tree_list, mcc_map, one_mutation=1e-4): for leaf in tree.get_terminals(): leaf.child_mccs = [set([mcc_map[leaf.name][pos]]) for pos in range(len_tree_list)] @@ -217,16 +178,16 @@ def assign_all_mccs(tree, len_tree_list, mcc_map, one_mutation=1e-4): common_mccs = [set.intersection(*[c.child_mccs[pos] for c in n]) for pos in range(len_tree_list)] n.branch_length = max(0.5*one_mutation, n.branch_length) n.child_mccs = [] - for pos in range(len_tree_list): - if len(common_mccs[pos]): - n.child_mccs.append(common_mccs[pos]) + for other_tree in range(len_tree_list): + if len(common_mccs[other_tree]): + n.child_mccs.append(common_mccs[other_tree]) else: - n.child_mccs.append(set.union(*[c.child_mccs[pos] for c in n])) - mcc_intersection = [set.intersection(*[c.child_mccs[pos] for c in tree.root]) for pos in range(len_tree_list)] + n.child_mccs.append(set.union(*[c.child_mccs[other_tree] for c in n])) + mcc_intersection = [set.intersection(*[c.child_mccs[other_tree] for c in tree.root]) for other_tree in range(len_tree_list)] tree.root.mcc = [] - for pos in range(len_tree_list): - if len(mcc_intersection[pos]): - tree.root.mcc.append(list(mcc_intersection[pos])[0]) + for other_tree in range(len_tree_list): + if len(mcc_intersection[other_tree]): + tree.root.mcc.append(list(mcc_intersection[other_tree])[0]) else: tree.root.mcc.append(None) for n in tree.get_nonterminals(order='preorder'): @@ -234,10 +195,10 @@ def assign_all_mccs(tree, len_tree_list, mcc_map, one_mutation=1e-4): continue else: n.mcc = [] - for pos in range(len_tree_list): - if n.up.mcc[pos] in n.child_mccs[pos]: # parent MCC part of children -> that is the MCC - n.mcc.append(n.up.mcc[pos]) - elif len(n.child_mccs[pos])==1: # child is an MCC - n.mcc.append(list(n.child_mccs[pos])[0]) + for other_tree in range(len_tree_list): + if n.up.mcc[other_tree] in n.child_mccs[other_tree]: # parent MCC part of children -> that is the MCC + n.mcc.append(n.up.mcc[other_tree]) + elif len(n.child_mccs[other_tree])==1: # child is an MCC + n.mcc.append(list(n.child_mccs[other_tree])[0]) else: # no unique child MCC and no match with parent -> not part of an MCCs n.mcc.append(None) \ No newline at end of file From b747f15e6d5877436c5638be9cf3780e60ccb9c6 Mon Sep 17 00:00:00 2001 From: Richard Neher Date: Mon, 15 Aug 2022 11:58:05 +0200 Subject: [PATCH 05/12] remember slice for each alignment in concatenated ARG alignemnt --- treetime/arg.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/treetime/arg.py b/treetime/arg.py index 6ab52d00..058063d2 100644 --- a/treetime/arg.py +++ b/treetime/arg.py @@ -73,7 +73,10 @@ def parse_arg(tree_files, aln_files, MCC_file, fill_overhangs=True): # read alignments and construct edge modified sequence arrays alignments = [{s.id:s for s in AlignIO.read(aln, 'fasta')} for aln in aln_files] + alignment_ranges = [] + start_val = 0 for aln in alignments: + alignment_ranges.append((start_val, start_val+aln.alignment_length)) for s,seq in aln.items(): seqstr = "".join(seq2array(seq, fill_overhangs=fill_overhangs)) seq.seq = Seq.Seq(seqstr) @@ -93,7 +96,7 @@ def parse_arg(tree_files, aln_files, MCC_file, fill_overhangs=True): masks = get_mask_dict(l, tree_names) return {"MCCs_dict": MCC_dict, "trees_dict":trees_dict, "alignment":MultipleSeqAlignment(aln_combined), - "masks_dict":masks} + "masks_dict":masks, "alignment_ranges":alignment_ranges} def setup_arg(trees_dict, alignments, dates, MCCs_dict, masks_dict, tree_name, gtr='JC69', From faa53d6eb3800cf914a37463487faae88368875e Mon Sep 17 00:00:00 2001 From: anna-parker Date: Mon, 15 Aug 2022 15:04:54 +0200 Subject: [PATCH 06/12] output aln restricted to focal segment in arg, add documentation --- test/test_arg.py | 2 +- treetime/arg.py | 168 +++++++++++++++++++++++++++++++------------ treetime/treeanc.py | 12 +++- treetime/wrappers.py | 16 ++--- 4 files changed, 141 insertions(+), 57 deletions(-) diff --git a/test/test_arg.py b/test/test_arg.py index 51b61404..f8aad8ba 100644 --- a/test/test_arg.py +++ b/test/test_arg.py @@ -69,7 +69,7 @@ def test_setup_arg(): dict_ = arg.parse_arg(tree_nwk_files, aln_files, MCC_file, fill_overhangs=True) ##check if arg is set up correctly on tree_b - masked_tree_b = arg.setup_arg(dict_["trees_dict"], dict_["alignment"], dates, dict_["MCCs_dict"], dict_["masks_dict"], "tree_b", gtr='JC69', + masked_tree_b = arg.setup_arg("tree_b", dict_["trees_dict"], dict_["alignment"], dates, dict_["MCCs_dict"], dict_["masks_dict"], gtr='JC69', verbose=0, fill_overhangs=True, reroot=False, fixed_clock_rate=0.001, alphabet='nuc') node_dict = {} diff --git a/treetime/arg.py b/treetime/arg.py index 058063d2..2a6e6169 100644 --- a/treetime/arg.py +++ b/treetime/arg.py @@ -4,6 +4,11 @@ import itertools def get_tree_names(tree_nwk_files): + ''' + Input: string list of `.nwk` file PATH locations + + Returns tree names from `.nwk` file names (using TreeKnit standard) + ''' tree_names = [] for file in tree_nwk_files: file_name = file.split("/")[-1].split(".")[0] @@ -14,7 +19,19 @@ def get_tree_names(tree_nwk_files): raise Exception("Error: Tree names must be unique, see TreeKnit output format.") return tree_names -def get_MCC_dict(MCC_file): +def get_MCC_dict(MCC_file): + ''' + Read in MCCs from TreeKnit .json output file + + Returns: + ------- + MCC_dict : dict{frozenset(str), list(str)} + key : frozenset of tree name pairs + item : list of lists of leaf names + two leaves are in the same list if they are in a maximally compatible + clade, meaning in that tree pair there was no recombination in their subclade + + ''' f = open(MCC_file) data = json.load(f) MCC_dict = {} @@ -24,34 +41,76 @@ def get_MCC_dict(MCC_file): return MCC_dict def get_mask_dict(length_segments, tree_names): + """Create alignment masks for tree branches corresponding to which trees + share this branch. + + Parameters + ---------- + length_segments : int list + length of segment in each tree + tree_names : str list + name of each corresponding tree + + Returns + ------- + mask_dict: dictionary + key is a frozenset of tree names, items are boolean masks of length + len(joint alignment), positions in the mask are only 1 if they + correspond to positions in the segments of the trees given + tree_segment_positions : dictionary + start and end position of each segment's position in the combined alignment + """ + #list of start positions (or end positions +1) of each segment pos_list = [0] for l in length_segments: new = pos_list[-1] + l pos_list.append(new) + #create dictionary of start and end position of each segment in alignment + tree_segment_positions = {} + for i in range(len(tree_names)): + tree_segment_positions[tree_names[i]] = (pos_list[i], pos_list[i+1]) + + #mask dictionary mask = {} no_trees = len(tree_names) for r in range(1,(no_trees+1)): + #all combinations of at least one tree combos = itertools.combinations(range(1, (no_trees+1)), r) for comb in combos: new_mask = np.zeros(sum(length_segments)) for c in comb: new_mask[pos_list[c-1]:pos_list[c]] = 1 mask[frozenset([tree_names[c-1] for c in comb])] = new_mask - return mask + return mask, tree_segment_positions def parse_arg(tree_files, aln_files, MCC_file, fill_overhangs=True): -#def parse_arg(tree_file_dir, aln_file_dir, MCC_file, tree_names, fill_overhangs=True): """parse the output of TreeKnit and return a file structure to be further consumed by TreeTime - Args: - tree_files (str): file names of trees - aln_files (str): file names of alignments MUST be in the same order as tree_files - MCC_file (str): name of mcc file - fill_overhangs (bool, optional): fill terminal gaps of alignmens before concatenating. Defaults to True. + Parameters + ---------- + tree_files : str list + file names of trees + aln_files : str list + file names of alignments MUST be in the same order as tree_files + MCC_file : str + name of mcc file + fill_overhangs : bool, optional + fill terminal gaps of alignmens before concatenating. Defaults to True. - Returns: - dict: dictionary containing the two trees, the concatenated alignment, full and segment masks, and the MCCs + Returns + ---------- + dict: dictionary containing + dict["MCCs_dict"]: MCCs dictionary + key is frozenset of tree names, items are a list of leaf name lists + dict["trees_dict"] : tree dictionary + key is the tree name + dict["alignment"] :MultipleSeqAlignment + the concatenated alignment + dict["masks_dict"] : mask dictionary + key is the tree name + dict["seg_pos_dict"] : dictionary + start and end position of each tree's sequence in dict["alignment"] """ from Bio import Phylo, AlignIO, Seq from Bio.Align import MultipleSeqAlignment @@ -73,10 +132,7 @@ def parse_arg(tree_files, aln_files, MCC_file, fill_overhangs=True): # read alignments and construct edge modified sequence arrays alignments = [{s.id:s for s in AlignIO.read(aln, 'fasta')} for aln in aln_files] - alignment_ranges = [] - start_val = 0 for aln in alignments: - alignment_ranges.append((start_val, start_val+aln.alignment_length)) for s,seq in aln.items(): seqstr = "".join(seq2array(seq, fill_overhangs=fill_overhangs)) seq.seq = Seq.Seq(seqstr) @@ -91,50 +147,62 @@ def parse_arg(tree_files, aln_files, MCC_file, fill_overhangs=True): seq.id = leaf aln_combined.append(seq) - # construct masks for the concatenation and the two segments + # construct masks for the concatenated alignment l = [len(a[leaf]) for a in alignments] - masks = get_mask_dict(l, tree_names) + masks, segment_positions = get_mask_dict(l, tree_names) return {"MCCs_dict": MCC_dict, "trees_dict":trees_dict, "alignment":MultipleSeqAlignment(aln_combined), - "masks_dict":masks, "alignment_ranges":alignment_ranges} + "masks_dict":masks, "seg_pos_dict":segment_positions} + -def setup_arg(trees_dict, alignments, dates, MCCs_dict, masks_dict, tree_name, gtr='JC69', +def setup_arg(tree_name, trees_dict, aln, dates, MCCs_dict, masks_dict, gtr='JC69', verbose=0, fill_overhangs=True, reroot=True, fixed_clock_rate=None, alphabet='nuc', **kwargs): """construct a TreeTime object with the appropriate masks on each node for branch length optimization with full or segment only alignment. - Args: - T (str, Bio.Phylo.Tree): tree of focal segment - aln (Bio.Align.MultipleSeqAlignment): Concatenated multiple sequence alignment - total_mask (np.array): boolean array that is true for the entire sequence - segment_mask (np.array): boolean array that is true only for the focal segment - dates (dict): sampling dates - MCCs (list): list of MCCs - gtr (str, optional): GTR model. Defaults to 'JC69'. - verbose (int, optional): verbosity. Defaults to 0. - fill_overhangs (bool, optional): treat terminal gap as missing. Defaults to True. - reroot (bool, optional): reroot the tree. Defaults to True. + Parameters: + --------- + tree_name : str + name of focal segment / tree + trees_dict : dictionary{str, Bio.Phylo.Tree} + key is tree_name + aln : Bio.Align.MultipleSeqAlignment): + Concatenated multiple sequence alignment + dates : dict + sampling dates + MCCs_dict : dictionary{frozenset(str), list(str)} + key frozenset of tree_names, item MCC as str of leaf nodes + gtr : str, optional + GTR model. Defaults to 'JC69'. + verbose : int, optional + verbosity. Defaults to 0. + fill_overhangs : bool, optional + treat terminal gap as missing. Defaults to True. + reroot : bool, optional + reroot the tree. Defaults to True. Returns: + -------- TreeTime: TreeTime instance """ from treetime import TreeTime T= trees_dict[tree_name] ##desired tree - ##get list of MCCs of all trees with T and the order of these trees + ##get list of MCCs of all other trees with T and the order of these trees MCCs = [] - tree_order = {} - i = 0 + other_tree_order = {} + i =0 for t in trees_dict.keys(): if t != tree_name: - tree_order[i] = t + other_tree_order[i] = t MCCs.append(MCCs_dict[frozenset([tree_name, t])]) i +=1 + num_other_trees = len(other_tree_order.keys()) tt = TreeTime(dates=dates, tree=T, - aln=alignments, gtr=gtr, alphabet=alphabet, verbose=verbose, + aln=aln, gtr=gtr, alphabet=alphabet, verbose=verbose, fill_overhangs=fill_overhangs, keep_node_order=True, compress=False, **kwargs) @@ -144,22 +212,25 @@ def setup_arg(trees_dict, alignments, dates, MCCs_dict, masks_dict, tree_name, g # make a lookup for the MCCs and assign to tree leaf_to_MCC = get_mcc_map(MCCs) - assign_all_mccs(tt.tree, len(MCCs), leaf_to_MCC, tt.one_mutation) + assign_all_mccs(tt.tree, num_other_trees, leaf_to_MCC, tt.one_mutation) # assign masks to branches whenever child and parent are in the same MCC for n in tt.tree.find_clades(): shared = [(n.mcc[other_tree] is not None) and n.up and n.up.mcc[other_tree]==n.mcc[other_tree] - for other_tree in range(len(MCCs))] + for other_tree in range(num_other_trees)] ##use tree_order to convert position in MCC list to tree_names and see which trees share this branch and assign a proper mask - branch_shared = [tree_order[i] for i, x in enumerate(shared) if x] - branch_shared.append(tree_name) + branch_shared = [other_tree_order[i] for i, x in enumerate(shared) if x] + branch_shared.append(tree_name) ##branch is always in tree_name n.mask = masks_dict[frozenset(branch_shared)] return tt def get_mcc_map(MCCs_list): - # make a lookup for the MCCs and assign to trees + """ + Make a lookup for the MCCs and assign to trees. + Each leaf will be assigned a list of mcc clades in the order of `MCCs_list`. + """ leaf_to_MCC = {} for MCCs in MCCs_list: for mi,mcc in enumerate(MCCs): @@ -171,24 +242,31 @@ def get_mcc_map(MCCs_list): return leaf_to_MCC -def assign_all_mccs(tree, len_tree_list, mcc_map, one_mutation=1e-4): +def assign_all_mccs(tree, num_other_trees, mcc_map, one_mutation=1e-4): + ''' + For each node in the tree, if it is part of a mcc clade, assign it to that clade using Fitch. + Do this for every MCC between the focal tree `tree` and another tree. + Additionally assign a minimal branch length to all branches to avoid any numerical issues. + + ''' + #child_mccs is a list of sets. Each set corresponds to which mccs the children of that node are in that segment. for leaf in tree.get_terminals(): - leaf.child_mccs = [set([mcc_map[leaf.name][pos]]) for pos in range(len_tree_list)] + leaf.child_mccs = [set([mcc_map[leaf.name][other_tree]]) for other_tree in range(num_other_trees)] leaf.mcc = mcc_map[leaf.name] leaf.branch_length = max(0.5*one_mutation, leaf.branch_length) # reconstruct MCCs with Fitch algorithm for n in tree.get_nonterminals(order='postorder'): - common_mccs = [set.intersection(*[c.child_mccs[pos] for c in n]) for pos in range(len_tree_list)] + common_mccs = [set.intersection(*[c.child_mccs[other_tree] for c in n]) for other_tree in range(num_other_trees)] n.branch_length = max(0.5*one_mutation, n.branch_length) n.child_mccs = [] - for other_tree in range(len_tree_list): + for other_tree in range(num_other_trees): if len(common_mccs[other_tree]): n.child_mccs.append(common_mccs[other_tree]) else: n.child_mccs.append(set.union(*[c.child_mccs[other_tree] for c in n])) - mcc_intersection = [set.intersection(*[c.child_mccs[other_tree] for c in tree.root]) for other_tree in range(len_tree_list)] + mcc_intersection = [set.intersection(*[c.child_mccs[other_tree] for c in tree.root]) for other_tree in range(num_other_trees)] tree.root.mcc = [] - for other_tree in range(len_tree_list): + for other_tree in range(num_other_trees): if len(mcc_intersection[other_tree]): tree.root.mcc.append(list(mcc_intersection[other_tree])[0]) else: @@ -198,7 +276,7 @@ def assign_all_mccs(tree, len_tree_list, mcc_map, one_mutation=1e-4): continue else: n.mcc = [] - for other_tree in range(len_tree_list): + for other_tree in range(num_other_trees): if n.up.mcc[other_tree] in n.child_mccs[other_tree]: # parent MCC part of children -> that is the MCC n.mcc.append(n.up.mcc[other_tree]) elif len(n.child_mccs[other_tree])==1: # child is an MCC diff --git a/treetime/treeanc.py b/treetime/treeanc.py index f2796c3b..d8368bd8 100644 --- a/treetime/treeanc.py +++ b/treetime/treeanc.py @@ -1575,7 +1575,7 @@ def cost_func(sqrt_mu): ############################################################################### ### Utility functions ############################################################################### - def get_reconstructed_alignment(self, reconstruct_tip_states=False): + def get_reconstructed_alignment(self, reconstruct_tip_states=False, aln_slice=None): """ Get the multiple sequence alignment, including reconstructed sequences for the internal nodes. @@ -1607,11 +1607,17 @@ def get_reconstructed_alignment(self, reconstruct_tip_states=False): new_aln['positions'] = self.data.nonref_positions new_aln['inferred_const_sites'] = self.data.inferred_const_sites else: - new_aln = MultipleSeqAlignment([SeqRecord(id=n.name, + if aln_slice: + start, end = aln_slice + new_aln = MultipleSeqAlignment([SeqRecord(id=n.name, + seq=Seq(self.sequence(n, reconstructed=reconstruct_tip_states, + as_string=True, compressed=False)[start:end]), description="") + for n in self.tree.find_clades()]) + else: + new_aln = MultipleSeqAlignment([SeqRecord(id=n.name, seq=Seq(self.sequence(n, reconstructed=reconstruct_tip_states, as_string=True, compressed=False)), description="") for n in self.tree.find_clades()]) - return new_aln diff --git a/treetime/wrappers.py b/treetime/wrappers.py index f4e977fc..e8076a2c 100644 --- a/treetime/wrappers.py +++ b/treetime/wrappers.py @@ -162,14 +162,14 @@ def plot_rtt(tt, fname): def export_sequences_and_tree(tt, basename, is_vcf=False, zero_based=False, report_ambiguous=False, timetree=False, confidence=False, - reconstruct_tip_states=False, tree_suffix=''): + reconstruct_tip_states=False, tree_suffix='', aln_slice=None): seq_info = is_vcf or tt.aln if is_vcf: outaln_name = basename + f'ancestral_sequences{tree_suffix}.vcf' write_vcf(tt.get_reconstructed_alignment(reconstruct_tip_states=reconstruct_tip_states), outaln_name) elif tt.aln: outaln_name = basename + f'ancestral_sequences{tree_suffix}.fasta' - AlignIO.write(tt.get_reconstructed_alignment(reconstruct_tip_states=reconstruct_tip_states), outaln_name, 'fasta') + AlignIO.write(tt.get_reconstructed_alignment(reconstruct_tip_states=reconstruct_tip_states, aln_slice=aln_slice), outaln_name, 'fasta') if seq_info: print("\n--- alignment including ancestral nodes saved as \n\t %s\n"%outaln_name) @@ -495,11 +495,11 @@ def arg_time_trees(params): outdir = get_outdir(params, f'_ARG-treetime') gtr = create_gtr(params) - tt = setup_arg(arg_params['trees_dict'], arg_params['alignment'], dates, arg_params['MCCs_dict'], arg_params['masks_dict'], - tree_name, gtr=gtr, verbose=params.verbose, fill_overhangs=not params.keep_overhangs, - fixed_clock_rate = params.clock_rate, reroot=root) + tt = setup_arg(tree_name, arg_params['trees_dict'], arg_params['alignment'], dates, arg_params['MCCs_dict'], arg_params['masks_dict'], + gtr=gtr, verbose=params.verbose, fill_overhangs=not params.keep_overhangs, + fixed_clock_rate = params.clock_rate, reroot=root) - run_timetree(tt, params, outdir, tree_suffix=f"_"+tree_name, prune_short=False, method_anc=params.method_anc) + run_timetree(tt, params, outdir, tree_suffix=f"_"+tree_name, prune_short=False, method_anc=params.method_anc, aln_slice=arg_params["seg_pos_dict"][tree_name]) @@ -535,7 +535,7 @@ def timetree(params): return run_timetree(myTree, params, outdir) -def run_timetree(myTree, params, outdir, tree_suffix='', prune_short=True, method_anc='probabilistic'): +def run_timetree(myTree, params, outdir, tree_suffix='', prune_short=True, method_anc='probabilistic', aln_slice=None): ''' this function abstracts the time tree estimation that is used for regular treetime inference and for arg time tree inference. @@ -675,7 +675,7 @@ def run_timetree(myTree, params, outdir, tree_suffix='', prune_short=True, metho export_sequences_and_tree(myTree, basename, is_vcf, params.zero_based, timetree=True, confidence=calc_confidence, reconstruct_tip_states=params.reconstruct_tip_states, - tree_suffix=tree_suffix) + tree_suffix=tree_suffix, aln_slice=aln_slice) return 0 From ab6f80f9949048b9e178daba553aee2ce2ae43c1 Mon Sep 17 00:00:00 2001 From: anna-parker Date: Tue, 16 Aug 2022 15:10:18 +0200 Subject: [PATCH 07/12] ensure file name reader is os compatible --- treetime/arg.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/treetime/arg.py b/treetime/arg.py index 2a6e6169..ed9f92d8 100644 --- a/treetime/arg.py +++ b/treetime/arg.py @@ -2,6 +2,7 @@ import numpy as np import json import itertools +from os import path def get_tree_names(tree_nwk_files): ''' @@ -11,7 +12,7 @@ def get_tree_names(tree_nwk_files): ''' tree_names = [] for file in tree_nwk_files: - file_name = file.split("/")[-1].split(".")[0] + file_name = path.splitext(path.basename(file))[0] file_name = file_name.replace("_resolved", "").replace("resolved", "") tree_names.append(file_name) if len(set(tree_names)) != len(tree_nwk_files): From 4bc6d1d341f496ae22ca7a641983a6b4ed3b71b5 Mon Sep 17 00:00:00 2001 From: anna-parker Date: Tue, 16 Aug 2022 17:32:06 +0200 Subject: [PATCH 08/12] fix alignment issue --- treetime/arg.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/treetime/arg.py b/treetime/arg.py index ed9f92d8..5d183104 100644 --- a/treetime/arg.py +++ b/treetime/arg.py @@ -1,3 +1,4 @@ +from ctypes import alignment from matplotlib.pyplot import fill import numpy as np import json @@ -132,7 +133,13 @@ def parse_arg(tree_files, aln_files, MCC_file, fill_overhangs=True): all_leaves = set.intersection(*[set([x.name for x in t.get_terminals()]) for (k, t) in trees_dict.items()]) # read alignments and construct edge modified sequence arrays - alignments = [{s.id:s for s in AlignIO.read(aln, 'fasta')} for aln in aln_files] + alignments = [] + alignment_lengths = [] + for aln_fname in aln_files: + aln = AlignIO.read(aln_fname, 'fasta') + alignment_lengths.append(aln.get_alignment_length()) + alignments.append({s.id:s for s in aln}) + for aln in alignments: for s,seq in aln.items(): seqstr = "".join(seq2array(seq, fill_overhangs=fill_overhangs)) @@ -141,7 +148,7 @@ def parse_arg(tree_files, aln_files, MCC_file, fill_overhangs=True): # construct concatenated alignment aln_combined = [] for leaf in all_leaves: - concat_seq = alignments[1][leaf] + concat_seq = alignments[0][leaf] for a in range(1, len(alignments)): concat_seq += alignments[a][leaf] seq = concat_seq @@ -149,8 +156,7 @@ def parse_arg(tree_files, aln_files, MCC_file, fill_overhangs=True): aln_combined.append(seq) # construct masks for the concatenated alignment - l = [len(a[leaf]) for a in alignments] - masks, segment_positions = get_mask_dict(l, tree_names) + masks, segment_positions = get_mask_dict(alignment_lengths, tree_names) return {"MCCs_dict": MCC_dict, "trees_dict":trees_dict, "alignment":MultipleSeqAlignment(aln_combined), "masks_dict":masks, "seg_pos_dict":segment_positions} @@ -190,7 +196,6 @@ def setup_arg(tree_name, trees_dict, aln, dates, MCCs_dict, masks_dict, gtr='JC6 from treetime import TreeTime T= trees_dict[tree_name] ##desired tree - ##get list of MCCs of all other trees with T and the order of these trees MCCs = [] other_tree_order = {} From 0e84e2217e1c01f27e60877ff6d26b2362fa824c Mon Sep 17 00:00:00 2001 From: anna-parker Date: Tue, 16 Aug 2022 18:07:51 +0200 Subject: [PATCH 09/12] shuffle mcc numbering --- treetime/arg.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/treetime/arg.py b/treetime/arg.py index 5d183104..55bc9cab 100644 --- a/treetime/arg.py +++ b/treetime/arg.py @@ -4,6 +4,7 @@ import json import itertools from os import path +import random def get_tree_names(tree_nwk_files): ''' @@ -232,19 +233,23 @@ def setup_arg(tree_name, trees_dict, aln, dates, MCCs_dict, masks_dict, gtr='JC6 return tt -def get_mcc_map(MCCs_list): +def get_mcc_map(MCCs_list, shuffle=False): """ Make a lookup for the MCCs and assign to trees. Each leaf will be assigned a list of mcc clades in the order of `MCCs_list`. """ leaf_to_MCC = {} for MCCs in MCCs_list: + mcc_index = list(range(len(MCCs))) + if shuffle: + random.seed(987) + random.shuffle(mcc_index) for mi,mcc in enumerate(MCCs): for leaf in mcc: if leaf not in leaf_to_MCC: - leaf_to_MCC[leaf] = [mi] + leaf_to_MCC[leaf] = [mcc_index[mi]] else: - leaf_to_MCC[leaf].append(mi) + leaf_to_MCC[leaf].append(mcc_index[mi]) return leaf_to_MCC From 103c876b105a0dd9347038d343b9f66cb50135c6 Mon Sep 17 00:00:00 2001 From: anna-parker Date: Wed, 17 Aug 2022 11:55:20 +0200 Subject: [PATCH 10/12] write mcc list to nexus file as string to avoid parse errors --- treetime/wrappers.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/treetime/wrappers.py b/treetime/wrappers.py index e8076a2c..8fff01fe 100644 --- a/treetime/wrappers.py +++ b/treetime/wrappers.py @@ -217,10 +217,12 @@ def export_sequences_and_tree(tt, basename, is_vcf=False, zero_based=False, if tt.gtr.ambiguous not in [a,d]])+'"' else: if report_ambiguous: - n.comment= '&mutations="' + ','.join([a+str(pos + offset)+d for (a,pos, d) in n.mutations if n.mask[pos]>0])+f'",mcc="{n.mcc}"' + n.comment= '&mutations="' + ','.join([a+str(pos + offset)+d for (a,pos, d) in n.mutations if n.mask[pos]>0])\ + +f'",mcc="{",".join([str(x) for x in n.mcc])}"' else: n.comment= '&mutations="' + ','.join([a+str(pos + offset)+d for (a,pos, d) in n.mutations - if tt.gtr.ambiguous not in [a,d] and n.mask[pos]>0])+f'",mcc="{n.mcc}"' + if tt.gtr.ambiguous not in [a,d] and n.mask[pos]>0])\ + +f'",mcc="{",".join([str(x) for x in n.mcc])}"' for (a, pos, d) in n.mutations: if tt.gtr.ambiguous not in [a,d] or report_ambiguous: From 96c800019986b3fb73469ab316d5d9b337396d8d Mon Sep 17 00:00:00 2001 From: anna-parker Date: Tue, 30 Aug 2022 14:28:09 +0200 Subject: [PATCH 11/12] add documentation on arg command in treetime --- docs/source/tutorials/arg.rst | 45 +++++++++++++++++++++++++++++++++++ treetime/argument_parser.py | 7 +++--- 2 files changed, 49 insertions(+), 3 deletions(-) create mode 100644 docs/source/tutorials/arg.rst diff --git a/docs/source/tutorials/arg.rst b/docs/source/tutorials/arg.rst new file mode 100644 index 00000000..14a15526 --- /dev/null +++ b/docs/source/tutorials/arg.rst @@ -0,0 +1,45 @@ + +Using Recombination Event Knowledge to Improve Time Tree Inference +------------------------------------------------------------------ + +Although relatively uncommon, recombination can be a driver of pathogen evolution. However, recombining segments are often not used together in phylogenetic inference due to computational challenges. +Because of this, typically, segments of the genome with little to no recombination are used to infer the pathogen phylogeny, leading to a loss of information. +As recombination is an uncommon event most segments will have portions of their phylogenies that show a great deal of overlap. Segments share evolutionary history in these areas of overlap and this knowledge can +be used to improve divergence time estimates in TreeTime. + +To infer branch length TreeTime makes the assumption that the number of mutations on a branch of length :math:`\tau` is Poisson distributed + +.. math:: + n_{mut} \sim Pois(\mu \tau). + +Where :math:`\mu` is the mutation rate. +The variance and mean number of expected mutations on a branch of length :math:`\tau` is :math:`\mu \tau`. +If :math:`\mu` is known this relation can be used to estimate the branch length given the number of seen mutations, + +.. math:: + \tau^{infer} = \frac{n_{mut}}{\mu}, + +this estimator has expectation :math:`\tau`` and variance :math:`\frac{\tau}{\mu}`. +When this is repeated :math:`L` times (i.e. for a sequence of :math:`L` nucleotides, each with mutation rate :math:`\mu`), +the average number of mutations is normally distributed with mean :math:`\mu \tau` and standard error :math:`\frac{\mu \tau}{\sqrt{L}}`. +Assuming we know that a branch is shared between two segments we can use the alignment of both segments on this branch to estimate divergence time of this branch, decreasing the standard error. + +`TreeKnit `_ is a package that can infer recombination events from tree topologies. It returns lists of leaves that are connected by shared branches in pairs of trees. +These leaves can be used to determine so called maximally compatible clades (MCCs), or clades where topology is shared across trees. +If desired TreeKnit additionally returns trees that have been resolved according to each other. + +This output can be used in TreeTime to improve the inference of time trees and in turn improve the ancestral sequence reconstruction and the clock tree inference. +The ``treetime arg`` command uses input trees and their corresponding alignments to infer time trees. It is assumed the list of MCCs are in json format as described in TreeKnit. +For each tree, the list of maximally compatible clades with every other tree is used to determine if internal nodes are part of a MCC with another tree and if they are, which MCC they belong to. +This is done using the Fitch algorithm (function: ``assign_all_mccs``). If a node and it's parent both belong to the same MCC then the branch between them is shared. +For example if a branch is shared between trees of segments A and B then the alignment of both segment A and B can be used to infer the divergence time of the branch, +leading to more accurate branch length estimates than if only the alignment of segment A was used to infer the divergence time of that branch. + +In the test folder there is an example of a standard TreeKnit output for three trees. TreeTime expects recombination information to be in `TreeKnit output format `_. This can be used to run the ``treetime arg`` command: + +.. code:: bash + + treetime arg --trees arg/TreeKnit/tree_a_resolved.nwk arg/TreeKnit/tree_b_resolved.nwk arg/TreeKnit/tree_c_resolved.nwk --alignments arg/TreeKnit/aln_a.fasta arg/TreeKnit/aln_b.fasta arg/TreeKnit/aln_c.fasta --mccs arg/TreeKnit/MCCs.json --dates arg/TreeKnit/metadata.csv --clock-rate 0.0028 --outdir time_tree_arg_results + +For each tree treetime will output ancestral sequence reconstructions, dates of the tree nodes, as well as time tree and divergence trees for each input tree using information from other trees for shared branches. +The output will be written to the folder ``time_tree_arg_results``. diff --git a/treetime/argument_parser.py b/treetime/argument_parser.py index 6734f792..e3f7cf66 100644 --- a/treetime/argument_parser.py +++ b/treetime/argument_parser.py @@ -305,9 +305,10 @@ def toplevel(params): ## ARG arg_parser = subparsers.add_parser('arg', - description="Calculates the root-to-tip regression and quantifies the 'clock-i-ness' of the tree. " - "It will reroot the tree to maximize the clock-like " - "signal and recalculate branch length unless run with --keep_root.") + description="Command to use recombination event information to better estimate time trees, clock rates and ancestral reconstruction of sequences." + "Given trees, their alignments and a list of maximally compatible clades (shared topological" + "structures where no recombination events have occurred) this command will perform standard treetime inference" + "on all trees, using alignment information from other trees for inference on shared branches (branches in MCCs).") arg_parser.add_argument('--trees', nargs='+', required=True, type=str) arg_parser.add_argument('--alignments', nargs='+', required=True, type=str) arg_parser.add_argument('--mccs', required=True, type=str) From 401fa28c748dd9b609983d93a5ee0ab152b74ec8 Mon Sep 17 00:00:00 2001 From: anna-parker Date: Tue, 30 Aug 2022 14:42:32 +0200 Subject: [PATCH 12/12] small changes to docs --- docs/source/tutorials.rst | 3 ++- docs/source/tutorials/arg.rst | 9 +++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst index a30eef3e..07a6fcf2 100644 --- a/docs/source/tutorials.rst +++ b/docs/source/tutorials.rst @@ -28,4 +28,5 @@ These tutorials use example data provided in the github-repository tutorials/ancestral tutorials/clock tutorials/mugration - tutorials/homoplasy \ No newline at end of file + tutorials/homoplasy + tutorials/arg \ No newline at end of file diff --git a/docs/source/tutorials/arg.rst b/docs/source/tutorials/arg.rst index 14a15526..24d23306 100644 --- a/docs/source/tutorials/arg.rst +++ b/docs/source/tutorials/arg.rst @@ -3,7 +3,7 @@ Using Recombination Event Knowledge to Improve Time Tree Inference ------------------------------------------------------------------ Although relatively uncommon, recombination can be a driver of pathogen evolution. However, recombining segments are often not used together in phylogenetic inference due to computational challenges. -Because of this, typically, segments of the genome with little to no recombination are used to infer the pathogen phylogeny, leading to a loss of information. +Because of this, typically, segments of the genome with little to no recombination are used individually to infer the pathogen phylogeny, leading to a loss of information. As recombination is an uncommon event most segments will have portions of their phylogenies that show a great deal of overlap. Segments share evolutionary history in these areas of overlap and this knowledge can be used to improve divergence time estimates in TreeTime. @@ -29,10 +29,11 @@ These leaves can be used to determine so called maximally compatible clades (MCC If desired TreeKnit additionally returns trees that have been resolved according to each other. This output can be used in TreeTime to improve the inference of time trees and in turn improve the ancestral sequence reconstruction and the clock tree inference. -The ``treetime arg`` command uses input trees and their corresponding alignments to infer time trees. It is assumed the list of MCCs are in json format as described in TreeKnit. +The ``treetime arg`` command uses input trees and their corresponding alignments to infer time trees. It is assumed the list of MCCs are in json format as described in `TreeKnit `_ . For each tree, the list of maximally compatible clades with every other tree is used to determine if internal nodes are part of a MCC with another tree and if they are, which MCC they belong to. -This is done using the Fitch algorithm (function: ``assign_all_mccs``). If a node and it's parent both belong to the same MCC then the branch between them is shared. -For example if a branch is shared between trees of segments A and B then the alignment of both segment A and B can be used to infer the divergence time of the branch, +This is done using the `Fitch algorithm `_ (function: ``assign_all_mccs``). If a node and it's parent both belong to the same MCC then the branch between them is shared. +Take for example three trees of segments A, B and C. If a branch is shared between trees of segments A and B then the alignment of both segment A and B can be used to infer the divergence time of the branch +(the alignment of segment C is masked at this position), if the branch is only in tree A the alignment of segment B and C will be masked. If a branch is shared between the segments A, B and C then alignments A, B and C can be used, leading to more accurate branch length estimates than if only the alignment of segment A was used to infer the divergence time of that branch. In the test folder there is an example of a standard TreeKnit output for three trees. TreeTime expects recombination information to be in `TreeKnit output format `_. This can be used to run the ``treetime arg`` command: