Skip to content

Commit cb4e882

Browse files
authored
Merge pull request #129 from dengzq1234/main
update
2 parents a3e9591 + de1c2c1 commit cb4e882

File tree

5 files changed

+132
-80
lines changed

5 files changed

+132
-80
lines changed

treeprofiler/layouts/staple_layouts.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -287,17 +287,20 @@ def __init__(self, name=None, column=0, width=70, height=None,
287287

288288
def set_tree_style(self, tree, tree_style):
289289
super().set_tree_style(tree, tree_style)
290-
290+
first_key = min(self.color_range.keys())
291+
last_key = max(self.color_range.keys())
292+
middle_key = sorted(self.color_range.keys())[len(self.color_range) // 2]
293+
291294
text = TextFace(self.prop, padding_x=self.padding_x, width=self.width, rotation=315)
292295
tree_style.aligned_panel_header.add_face(text, column=self.column)
293296
if self.legend:
294297
tree_style.add_legend(title=self.prop,
295298
variable='continuous',
296299
value_range=self.value_range ,
297300
color_range=[
298-
self.color_range.get(20),
299-
self.color_range.get(10),
300-
self.color_range.get(1),
301+
self.color_range.get(last_key),
302+
self.color_range.get(middle_key),
303+
self.color_range.get(first_key),
301304
]
302305
)
303306
def set_node_style(self, node):
@@ -397,8 +400,8 @@ def set_tree_style(self, tree, tree_style):
397400
variable='continuous',
398401
value_range=[self.minval, self.maxval],
399402
color_range=[
400-
self.color_dict[20],
401-
self.color_dict[10],
403+
self.color_dict[-1],
404+
self.color_dict[len(self.color_dict)//2],
402405
self.color_dict[1]
403406
]
404407
)

treeprofiler/src/utils.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from __future__ import annotations
2-
from treeprofiler.src import b64pickle
2+
from treeprofiler.src import ete_format
33
from ete4.parser.newick import NewickError
44
from ete4.core.operations import remove
55
from ete4 import Tree, PhyloTree
@@ -160,7 +160,7 @@ def get_consensus_seq(matrix_string: Path | str, threshold=0.7) -> SeqRecord:
160160
consensus = summary.dumb_consensus(threshold, "-")
161161
return consensus
162162

163-
def counter2ratio(node, prop, minimum=0.05):
163+
def counter2ratio(node, prop, minimum=0.01):
164164
counter_separator = '||'
165165
items_separator = '--'
166166
count_missing = True
@@ -188,11 +188,11 @@ def counter2ratio(node, prop, minimum=0.05):
188188
ratio = 0
189189

190190
if ratio < minimum and ratio != 0: # show minimum color for too low
191-
ratio = 0.05
191+
ratio = minimum
192192

193193
return ratio
194194

195-
def categorical2ratio(node, prop, all_values, minimum=0.05):
195+
def categorical2ratio(node, prop, all_values, minimum=0.01):
196196
counter_separator = '||'
197197
items_separator = '--'
198198
count_missing = True
@@ -210,7 +210,7 @@ def categorical2ratio(node, prop, all_values, minimum=0.05):
210210
positive = 0
211211
ratio = positive / total
212212
if ratio < minimum and ratio != 0: # show minimum color for too low
213-
ratio = 0.05
213+
ratio = minimum
214214
ratios.append(ratio)
215215

216216
return ratios
@@ -240,7 +240,7 @@ def validate_tree(tree_path, input_type, internal_parser=None):
240240
try:
241241
with open(tree_path, 'r') as f:
242242
file_content = f.read()
243-
tree = b64pickle.loads(file_content, encoder='pickle', unpack=False)
243+
tree = ete_format.loads(file_content, encoder='pickle', unpack=False)
244244
eteformat_flag = True
245245
except Exception as e:
246246
if input_type == 'ete':

treeprofiler/tree_annotate.py

Lines changed: 36 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from treeprofiler.src import utils
2525
from treeprofiler.src.phylosignal import run_acr_discrete, run_acr_continuous, run_delta
2626
from treeprofiler.src.ls import run_ls
27-
from treeprofiler.src import b64pickle
27+
from treeprofiler.src import ete_format
2828

2929
from multiprocessing import Pool
3030

@@ -112,7 +112,7 @@ def populate_annotate_args(parser):
112112
# help=("<kingdom|phylum|class|order|family|genus|species|subspecies> "
113113
# "reference tree from taxonomic database"))
114114
add('--taxadb', type=str.upper,
115-
choices=['NCBI', 'GTDB', 'customdb'],
115+
choices=['NCBI', 'GTDB', 'MOTUS', 'customdb'],
116116
help="<NCBI|GTDB> for taxonomic annotation or fetch taxatree")
117117
add('--gtdb-version', type=int,
118118
choices=[95, 202, 207, 214, 220],
@@ -648,6 +648,17 @@ def run_tree_annotate(tree, input_annotated_tree=False,
648648
else:
649649
logger.info("No specific version or dump file provided; using latest GTDB data...")
650650
GTDBTaxa().update_taxonomy_database()
651+
elif taxadb == 'MOTUS':
652+
if gtdb_version and taxa_dump:
653+
logger.error('Please specify either GTDB version or taxa dump file, not both.')
654+
sys.exit(1)
655+
if taxa_dump:
656+
logger.info(f"Loading GTDB database dump file {taxa_dump}...")
657+
GTDBTaxa().update_taxonomy_database(taxa_dump)
658+
else:
659+
logger.info("No specific version or dump file provided; using latest GTDB data...")
660+
motus_dump = download_motus_dump()
661+
GTDBTaxa().update_taxonomy_database(motus_dump)
651662
elif taxadb == 'NCBI':
652663
if taxa_dump:
653664
logger.info(f"Loading NCBI database dump file {taxa_dump}...")
@@ -924,7 +935,7 @@ def run(args):
924935

925936
### out ete
926937
with open(os.path.join(args.outdir, base+'_annotated.ete'), 'w') as f:
927-
f.write(b64pickle.dumps(annotated_tree, encoder='pickle', pack=False))
938+
f.write(ete_format.dumps(annotated_tree, encoder='pickle', pack=False))
928939

929940
### out tsv
930941
prop_keys = list(prop2type.keys())
@@ -1780,7 +1791,7 @@ def merge_dictionaries(dict_ranks, dict_names):
17801791
return merged_dict
17811792

17821793

1783-
if db == "GTDB":
1794+
if db == "GTDB" or "MOTUS":
17841795
gtdb = GTDBTaxa()
17851796
tree.set_species_naming_function(return_spcode_gtdb)
17861797
gtdb.annotate_tree(tree, taxid_attr="species", ignore_unclassified=ignore_unclassified)
@@ -1840,28 +1851,27 @@ def merge_dictionaries(dict_ranks, dict_names):
18401851

18411852
return tree, rank2values
18421853

1843-
# def annotate_evol_events(tree, sp_delimiter='.', sp_field=0):
1844-
# def return_spcode(leaf):
1845-
# try:
1846-
# return leaf.name.split(sp_delimiter)[sp_field]
1847-
# except (IndexError, ValueError):
1848-
# return leaf.name
1849-
1850-
# tree.set_species_naming_function(return_spcode)
1851-
1852-
# node2species = tree.get_cached_content('species')
1853-
# for n in tree.traverse():
1854-
# n.props['species'] = node2species[n]
1855-
# if len(n.children) == 2:
1856-
# dup_sp = node2species[n.children[0]] & node2species[n.children[1]]
1857-
# if dup_sp:
1858-
# n.props['evoltype'] = 'D'
1859-
# n.props['dup_sp'] = ','.join(dup_sp)
1860-
# n.props['dup_percent'] = round(len(dup_sp)/len(node2species[n]), 3) * 100
1861-
# else:
1862-
# n.props['evoltype'] = 'S'
1863-
# n.del_prop('_speciesFunction')
1864-
# return tree
1854+
def download_motus_dump():
1855+
from hashlib import md5
1856+
import requests
1857+
1858+
url = "https://github.com/dengzq1234/ete-data/raw/refs/heads/main/motus_taxonomy/motus_latest_dump.tar.gz"
1859+
fname = './motus_latest_dump.tar.gz'
1860+
if not os.path.exists(fname):
1861+
print(f'Downloading {fname} from {url} ...')
1862+
with open(fname, 'wb') as f:
1863+
f.write(requests.get(url).content)
1864+
else:
1865+
md5_local = md5(open(fname, 'rb').read()).hexdigest()
1866+
md5_remote = requests.get(url + '.md5').text.split()[0]
1867+
1868+
if md5_local != md5_remote:
1869+
print(f'Updating {fname} from {url} ...')
1870+
with open(fname, 'wb') as f:
1871+
f.write(requests.get(url).content)
1872+
else:
1873+
print(f'File {fname} is already up-to-date with {url} .')
1874+
return fname
18651875

18661876
def annotate_evol_events(tree, sos_thr=0.0, sp_delimiter='.', sp_field=0):
18671877
def return_spcode(leaf):

0 commit comments

Comments
 (0)