I have currently made a tool that takes input sequences and maps them to a reference sequence highlighting it. You can then save the word document and it creates a table of the sequence name and the sequences.
What I want is to
- Highlight the sequences in specific colours.
- Highlight those sequences in the reference sequence.
Below is the create a word function.
def create_word_document(output_text, reference_sequence):
document = Document()
reverse_complemented_sequences = {}
# Add reference sequence
document.add_heading("Reference Sequence", level=1)
document.add_paragraph(reference_sequence, style='BodyText')
# Add a table for sequences
table = document.add_table(rows=1, cols=2, style='Table Grid')
table.autofit = True
table.cell(0, 0).text = "Sequence Name"
table.cell(0, 1).text = "Highlighted Sequence"
# Colors for highlighting sequences
highlight_colors = {
"F3": "FF0000", # Red for F3
"B3": "0000FF", # Blue for B3
"F2": "FFC0CB", # Pink for F2
"F1c": "0000FF", # Blue for F1c
"B2": "FFFF00", # Yellow for B2
"B1c": "00008B", # Dark blue for B1c
"LF": "FFA500", # Orange for LF
"LB": "800080", # Purple for LB
# Change colors as needed
}
for seq_name, highlighted_sequence in output_text.highlighted_sequences.items():
# Use the reverse complemented sequence for this section
reversed_sequence = reverse_complement_if_needed(seq_name, highlighted_sequence)
reverse_complemented_sequences[seq_name] = reversed_sequence
# Add sequence information to the table
row_cells = table.add_row().cells
row_cells[0].text = f"{seq_name} Sequence"
row_cells[1].text = reversed_sequence
# Add shading to the cell containing the sequence text if available
if seq_name in highlight_colors:
color_code = highlight_colors[seq_name]
apply_shading_to_cell(row_cells[1], reversed_sequence, color_code)
# Add FIP and BIP sequences after B3
if seq_name == "B3":
f1c_sequence = reverse_complement_if_needed("F1c", output_text.highlighted_sequences["F1c"])
f2_sequence = output_text.highlighted_sequences["F2"]
bip_sequence = reverse_complement_if_needed("B1c", output_text.highlighted_sequences["B1c"])
b2_sequence = output_text.highlighted_sequences["B2"]
# Add a row for FIP sequence
row_cells_fip = table.add_row().cells
row_cells_fip[0].text = "FIP Sequence"
row_cells_fip[1].text = f"{f1c_sequence}-{f2_sequence}"
# Apply shading to FIP sequence if available
if "F1c" in highlight_colors:
apply_shading_to_cell(row_cells_fip[1], row_cells_fip[1].text, highlight_colors["F1c"])
# Add a row for BIP sequence
row_cells_bip = table.add_row().cells
row_cells_bip[0].text = "BIP Sequence"
row_cells_bip[1].text = f"{bip_sequence}-{b2_sequence}"
# Apply shading to BIP sequence if available
if "B1c" in highlight_colors:
apply_shading_to_cell(row_cells_bip[1], row_cells_bip[1].text, highlight_colors["B1c"])
# Save the document
document.save("LAMP Primer mapping.docx")
def apply_shading_to_cell(cell, text, color_code):
# Clear existing text
cell.text = ''
# Add the text to the cell
paragraph = cell.paragraphs[0]
run = paragraph.add_run(text)
# Create shading element
shading_elm = OxmlElement('w:shd')
shading_elm.set(qn('w:val'), 'clear')
shading_elm.set(qn('w:color'), 'auto')
shading_elm.set(qn('w:fill'), color_code) # Set fill color
# Append shading element to run properties
run._r.append(shading_elm)
# Set font size for the entire paragraph
paragraph.style.font.size = Pt(12)
Full code:
import tkinter as tk
from Bio.Seq import Seq
from Bio import pairwise2
from docx import Document
from docx.shared import Inches
from docx.shared import Pt
from docx.oxml.shared import OxmlElement
from docx.oxml.ns import qn
class HighlightedOutputText(tk.Text):
def __init__(self, *args, **kwargs):
tk.Text.__init__(self, *args, **kwargs)
self.highlighted_sequences = {}
def map_and_highlight(self, seq_name, reference_sequence, input_sequence):
best_alignment = self.find_best_alignment(reference_sequence, input_sequence)
if best_alignment is not None:
reference_aligned, input_aligned, start_position, end_position = best_alignment
# Configure highlight tag based on sequence name
highlight_tag = f"{seq_name.lower()}_highlight"
self.tag_configure(highlight_tag, background=get_highlight_color(seq_name))
# Apply the color tag to the aligned region in the reference sequence
self.insert(tk.END, f"\n{seq_name} Input sequence:\n{input_sequence}\n\n")
self.insert(tk.END, "\nHighlighted sequence:\n")
self.insert(tk.END, reference_sequence[:start_position], "default")
self.insert(tk.END, reference_sequence[start_position:end_position], highlight_tag)
self.insert(tk.END, reference_sequence[end_position:], "default")
# Save highlighted sequence for later use
self.highlighted_sequences[seq_name] = reference_sequence[start_position:end_position]
else:
self.insert(tk.END, f"\n{seq_name} Input sequence:\n{input_sequence}\n\n")
self.insert(tk.END, "\nNo matching region found in the reference sequence.\n", "default")
def find_best_alignment(self, reference_sequence, input_sequence):
alignments_forward = pairwise2.align.localxx(reference_sequence, input_sequence)
alignments_reverse = pairwise2.align.localxx(reference_sequence, str(Seq(input_sequence).reverse_complement()))
if not alignments_forward and not alignments_reverse:
print(f"No alignment found for input sequence: {input_sequence}")
return None
# Choose the best alignment based on the score
best_alignment = max(alignments_forward + alignments_reverse, key=lambda a: a.score)
# Extract aligned sequences
reference_aligned = best_alignment[0]
input_aligned = best_alignment[1]
# Find the start and end positions of the input sequence in the reference
start_position = reference_aligned.find(input_aligned.replace('-', '').lstrip())
end_position = start_position + len(input_aligned.replace('-', '').rstrip())
return reference_aligned, input_aligned, start_position, end_position
def clear_output(self):
self.delete(1.0, tk.END)
self.highlighted_sequences = {}
def reverse_complement_if_needed(seq_name, input_sequence):
# Check if the sequence name indicates reverse complementing is needed
if seq_name in ["B3", "F1c", "B2", "LF"]:
return str(Seq(input_sequence).reverse_complement())
else:
return input_sequence
def get_highlight_color(seq_name):
# Return RGB values based on the sequence name
if seq_name == "F3":
return "#808080" # Gray
elif seq_name == "B3":
return "#404040" # Dark Gray
elif seq_name == "LF":
return "#008000" # Green
elif seq_name == "LB":
return "#FF0000" # Red
elif seq_name == "F2":
return "#40E0D0" # Turquoise
elif seq_name == "F1c":
return "#FFFF00" # Yellow
elif seq_name == "B2":
return "#FFC0CB" # Pink
elif seq_name == "B1c":
return "#00008B" # Dark Blue
else:
return "#FFA500" # Default color for other sequences
def create_word_document(output_text, reference_sequence):
document = Document()
reverse_complemented_sequences = {}
# Add reference sequence
document.add_heading("Reference Sequence", level=1)
document.add_paragraph(reference_sequence, style='BodyText')
# Add a table for sequences
table = document.add_table(rows=1, cols=2, style='Table Grid')
table.autofit = True
table.cell(0, 0).text = "Sequence Name"
table.cell(0, 1).text = "Highlighted Sequence"
# Colors for highlighting sequences
highlight_colors = {
"F3": "FF0000", # Red for F3
"B3": "0000FF", # Blue for B3
"F2": "FFC0CB", # Pink for F2
"F1c": "0000FF", # Blue for F1c
"B2": "FFFF00", # Yellow for B2
"B1c": "00008B", # Dark blue for B1c
"LF": "FFA500", # Orange for LF
"LB": "800080", # Purple for LB
# Change colors as needed
}
for seq_name, highlighted_sequence in output_text.highlighted_sequences.items():
# Use the reverse complemented sequence for this section
reversed_sequence = reverse_complement_if_needed(seq_name, highlighted_sequence)
reverse_complemented_sequences[seq_name] = reversed_sequence
# Add sequence information to the table
row_cells = table.add_row().cells
row_cells[0].text = f"{seq_name} Sequence"
row_cells[1].text = reversed_sequence
# Add shading to the cell containing the sequence text if available
if seq_name in highlight_colors:
color_code = highlight_colors[seq_name]
apply_shading_to_cell(row_cells[1], reversed_sequence, color_code)
# Add FIP and BIP sequences after B3
if seq_name == "B3":
f1c_sequence = reverse_complement_if_needed("F1c", output_text.highlighted_sequences["F1c"])
f2_sequence = output_text.highlighted_sequences["F2"]
bip_sequence = reverse_complement_if_needed("B1c", output_text.highlighted_sequences["B1c"])
b2_sequence = output_text.highlighted_sequences["B2"]
# Add a row for FIP sequence
row_cells_fip = table.add_row().cells
row_cells_fip[0].text = "FIP Sequence"
row_cells_fip[1].text = f"{f1c_sequence}-{f2_sequence}"
# Apply shading to FIP sequence if available
if "F1c" in highlight_colors:
apply_shading_to_cell(row_cells_fip[1], row_cells_fip[1].text, highlight_colors["F1c"])
# Add a row for BIP sequence
row_cells_bip = table.add_row().cells
row_cells_bip[0].text = "BIP Sequence"
row_cells_bip[1].text = f"{bip_sequence}-{b2_sequence}"
# Apply shading to BIP sequence if available
if "B1c" in highlight_colors:
apply_shading_to_cell(row_cells_bip[1], row_cells_bip[1].text, highlight_colors["B1c"])
# Save the document
document.save("LAMP Primer mapping.docx")
def apply_shading_to_cell(cell, text, color_code):
# Clear existing text
cell.text = ''
# Add the text to the cell
paragraph = cell.paragraphs[0]
run = paragraph.add_run(text)
# Create shading element
shading_elm = OxmlElement('w:shd')
shading_elm.set(qn('w:val'), 'clear')
shading_elm.set(qn('w:color'), 'auto')
shading_elm.set(qn('w:fill'), color_code) # Set fill color
# Append shading element to run properties
run._r.append(shading_elm)
# Set font size for the entire paragraph
paragraph.style.font.size = Pt(12)
# GUI setup
root = tk.Tk()
root.title("Sequence Alignment")
# Entry widgets for reference sequence
label_reference = tk.Label(root, text="Reference Sequence:")
entry_reference = tk.Entry(root, width=30)
# Entry widgets for multiple sequences
entries = []
output_text = HighlightedOutputText(root, height=30, width=40)
output_text.configure(font=("TkDefaultFont", 10))
for seq_name in ["F3", "B3", "F2", "F1c", "B2", "B1c", "LF", "LB"]:
label = tk.Label(root, text=f"{seq_name} Sequence:")
entry = tk.Entry(root, width=30)
entries.append((seq_name, entry))
label.grid(row=len(entries), column=0, padx=10, pady=5)
entry.grid(row=len(entries), column=1, padx=10, pady=5)
def on_align_button_click():
reference_sequence = entry_reference.get()
output_text.clear_output() # Clear previous output
for seq_name, entry in entries:
input_sequence = entry.get()
input_sequence = reverse_complement_if_needed(seq_name, input_sequence)
output_text.map_and_highlight(seq_name, reference_sequence, input_sequence)
# Display all highlighted sequences in the output window
for seq_name, highlighted_sequence in output_text.highlighted_sequences.items():
output_text.insert(tk.END, f"\n{seq_name} Input sequence:\n{highlighted_sequence}\n")
# Button to trigger alignment
align_button = tk.Button(root, text="Align", command=on_align_button_click)
# Button to create a Word document
document_button = tk.Button(root, text="Create Word Document", command=lambda: create_word_document(output_text,entry_reference.get()))
# Grid layout
label_reference.grid(row=0, column=0, padx=10, pady=5)
entry_reference.grid(row=0, column=1, padx=10, pady=5)
align_button.grid(row=len(entries) + 1, column=0, columnspan=2, pady=10)
document_button.grid(row=len(entries) + 2, column=0, columnspan=2, pady=10)
output_text.grid(row=len(entries) + 3, column=0, columnspan=2, padx=10, pady=5)
root.mainloop()