BioPython phylo: collapsing nodes on tree if node in list and rename the respective collapsed node

59 Views Asked by At

I have a phylogenetic tree in Newick format, and I would like to remove all species from it that are on a specific list and rename it accordingly.

This is the tree:

((((A:0.1, B:0.2):0.3, C:0.3):0.15, (D:0.3, (E:0.1, (F:0.15, (G:0.1, H:0.1):0.1):0.1):0.1):0.1):0.15, I:0.2);

This is the table to rename:

| species | clade_renaming |
|------------|----------------|
| A, B       | X              |
| F, G, H    | Y              |

Expected result:

(((X:0.3, C:0.3):0.15, (D:0.3, (E:0.1, Y:0.1):0.1):0.1):0.15, I:0.2);

This is the current code that can collapse nodes:

from Bio import Phylo
import io

tree_structure = "((((A:0.1, B:0.2):0.3, C:0.3):0.15, (D:0.3, (E:0.1, (F:0.15, (G:0.1, H:0.1):0.1):0.1):0.1):0.1):0.15, I:0.2);"
tree = Phylo.read(io.StringIO(tree_structure), 'newick')

nodes_to_collapse = ["A", "B", "F", "G", "H"]

def collapse_nodes(tree, nodes_to_collapse):
    for node in tree.find_elements(target=lambda x: x.name in nodes_to_collapse, order="postorder"):
        tree.collapse(node)

collapse_nodes(tree, nodes_to_collapse)
Phylo.draw(tree)
1

There are 1 best solutions below

0
Umar On BEST ANSWER

I created a function to collapse nodes and remove their direct children based on the renaming table. Within this function, I iterated through the rename table, identified the common ancestor for each group of species to be renamed, updated its name, and removed its direct children.

from Bio import Phylo
import io

tree_structure = "((((A:0.1, B:0.2):0.3, C:0.3):0.15, (D:0.3, (E:0.1, (F:0.15, (G:0.1, H:0.1):0.1):0.1):0.1):0.1):0.15, I:0.2);"
rename_table = {"A, B": "X", "F, G, H": "Y"}

tree = Phylo.read(io.StringIO(tree_structure), 'newick')

# Function to collapse specified nodes and remove their direct children
def collapse_and_remove_children(tree, rename_table):
    for nodes, new_name in rename_table.items():
        # Remove leading and trailing whitespace from node names
        nodes = [node.strip() for node in nodes.split(',')]
        # Find the collapsed node
        collapsed_node = tree.common_ancestor(nodes)
        # Set the name of the collapsed node
        collapsed_node.name = new_name
        # Remove the direct children of the collapsed node
        collapsed_node.clades = []

# Collapse specified nodes and remove their direct children
collapse_and_remove_children(tree, rename_table)

# Draw the tree
Phylo.draw(tree)

enter image description here

Hi, after working around, I found suitable solution below here

from Bio import Phylo
import io

# Input tree and table
tree_structure = "((((A:0.1, B:0.2):0.3, C:0.3):0.15, (D:0.3, (E:0.1, (F:0.15, (G:0.1, H:0.1):0.1):0.1):0.1):0.1):0.15, I:0.2);"
rename_table = {"A, B": "X", "F, G, H": "Y"}

# Read the tree
tree = Phylo.read(io.StringIO(tree_structure), 'newick')

# Function to collapse and rename nodes
def collapse_and_rename(tree, rename_table):
    for nodes, new_name in rename_table.items():
        common_ancestor_names = [node.strip() for node in nodes.split(',')]
        common_ancestor = tree.common_ancestor(common_ancestor_names)
        
        # Calculate the total branch length of the collapsed node
        total_branch_length = sum(child.branch_length for child in common_ancestor.clades if child.branch_length is not None)
        
        # Set the new name for the common ancestor node
        common_ancestor.name = new_name
        
        # Adjust branch lengths of children
        for child in common_ancestor.clades:
            if child.branch_length:
                child.branch_length -= total_branch_length
        # Set the branch length of the collapsed node to the sum of its children's branch lengths
        common_ancestor.branch_length = total_branch_length



# Collapse and rename nodes
collapse_and_rename(tree, rename_table)

# Function to collapse nodes and update branch lengths
def collapse_nodes(tree, nodes_to_collapse):
    for node in tree.find_elements(target=lambda x: x.name in nodes_to_collapse, order="postorder"):
        tree.collapse(node)
nodes_to_collapse = ["A", "B", "F", "G", "H"]
# Collapse nodes and update branch lengths
collapse_nodes(tree, nodes_to_collapse)

# Draw the tree
Phylo.draw(tree)

enter image description here