print('######################################################################################################')
print('############## You are using Modification and Annotation in Proteins (MAP) Script ####################')
print('##################### MAP Program, developed by Prof G. P. S. Raghava group. #########################')
print('############ Please cite: MAP; available at https://webs.iiitd.edu.in/raghava/maprepo/  ##############')
print('######################################################################################################')
 

import streamlit as st
import re
from pathlib import Path

# Modification options from the documentation
MODIFICATION_CATEGORIES = {
    'ptm': {
        'name': 'Post-Translational Modifications',
        'options': {
            'Phos': 'Phosphorylation',
            'Glyc': 'Glycosylation',
            'Ac': 'Acetylation',
            'Me': 'Methylation',
            'Ub': 'Ubiquitination',
            'Sumo': 'Sumoylation',
            'OH': 'Hydroxylation',
            'palm': 'Palmitoylation',
            '': 'Undefined PTM'
        }
    },
    'nnm': {
        'name': 'Non-Natural Modifications',
        'options': {
            'PEG': 'PEGylation',
            'Fluoro': 'Fluorination',
            'PMe': 'Phos-Methyl',
            'Biotin': 'Biotinylation',
            '': 'Undefined non-natural modification'
        }
    },
    'nnr': {
        'name': 'Non-Natural Residues',
        'options': {
            'Nle': 'Norleucine',
            'Hph': 'Homophenylalanine',
            'Can': 'Canavanine',
            'Orn': 'Ornithine',
            'Cit': 'Citrulline',
            'Har': 'Homoarginine',
            'Aze': 'Azetidine-2-carboxylic acid',
            'Bala': 'β-Alanine',
            'Aib': 'α-Aminoisobutyric acid',
            'Bpa': 'p-Benzoyl-L-phenylalanine',
            'Cha': 'Cyclohexylalanine',
            'Fpa': '4-Fluorophenylalanine',
            'Nal': '2-Naphthylalanine',
            '': 'Undefined non-natural residue'
        }
    },
    'iso': {
        'name': 'Isotopic & Fluorescent Labeling',
        'options': {
            '13C': 'Carbon-13 labeling',
            '15N': 'Nitrogen-15 labeling',
            '2H': 'Deuterium labeling',
            '18O': 'Oxygen-18 labeling',
            'Fluorescein': 'Fluorescein dye',
            'Cy5': 'Cyanine5 dye',
            'FITC': 'Fluorescein isothiocyanate',
            '': 'Undefined labeling'
        }
    },
    'IR': {
        'name': 'Interaction Residues',
        'options': {
            'DNA': 'DNA interaction',
            'RNA': 'RNA interaction',
            'Pro': 'Protein interaction',
            'ATP': 'ATP interaction',
            'GTP': 'GTP interaction',
            'UTP': 'UTP interaction',
            'NAD': 'NAD interaction',
            'FAD': 'FAD interaction',
            'FMN': 'FMN interaction',
            'Heme': 'Heme interaction',
            'B12': 'Vitamin B12 interaction',
            'CoA': 'Coenzyme A interaction',
            'Ca': 'Calcium interaction',
            'Zn': 'Zinc interaction',
            '': 'Undefined interaction'
        }
    },
    'd': {
        'name': 'D-Amino Acids',
        'options': {}
    },
    'cyc': {
        'name': 'Cyclization',
        'options': {
            'N-C': 'Head-to-tail cyclization',
            'X-Y': 'Single disulfide bond between specified positions',
            'X-Y,Z-W': 'Multiple disulfide bonds',
            '': 'Undefined cyclization'
        }
    },
    'nt': {
        'name': 'N-Terminal Modifications',
        'options': {
            'Amid': 'Amidation',
            'Formyl': 'Formylation',
            'Acet': 'Acetylation',
            'Glyco': 'Glycosylation',
            'Me': 'Methylation',
            '': 'Undefined N-terminal modification'
        }
    },
    'ct': {
        'name': 'C-Terminal Modifications',
        'options': {
            'Amid': 'Amidation',
            'Glyco': 'Glycosylation',
            'Acet': 'Acetylation',
            '': 'Undefined C-terminal modification'
        }
    },
    'mut': {
        'name': 'Mutations',
        'options': {}
    },
    'ins': {
        'name': 'Insertions',
        'options': {}
    },
    'del': {
        'name': 'Deletions',
        'options': {}
    },
    'conj': {
        'name': 'Conjugation of Macromolecules',
        'options': {
            'Lipid': 'Lipid conjugation',
            'Mal': 'Maleimide conjugation',
            'DBCO': 'Dibenzocyclooctyne conjugation',
            '': 'Undefined conjugation'
        }
    }
}

def clean_sequence(sequence):
    """Remove whitespace and numbers from the sequence"""
    return re.sub(r'[\s\d]', '', sequence.upper())

def get_modification_details(mod_type, seq_num):
    """Get details for a specific modification type"""
    if mod_type not in MODIFICATION_CATEGORIES:
        return mod_type, ''
    
    category = MODIFICATION_CATEGORIES[mod_type]
    if not category['options']:
        return mod_type, ''
    
    st.write(f"Select {category['name']} type:")
    mod_detail = st.selectbox(
        "Available options:", 
        list(category['options'].keys()),
        format_func=lambda x: f"{x}: {category['options'][x]}" if x else "Select",
        key=f"mod_detail_{mod_type}_{seq_num}"
    )
    
    # Handle special cases
    if mod_type == 'cyc':
        if mod_detail == 'X-Y':
            positions = st.text_input("Enter disulfide bond positions (e.g., '3-4'):", "", key=f"cyc_positions_{seq_num}")
            if positions and not re.match(r'^\d+-\d+$', positions):
                st.error("Invalid format. Please use format like '3-4'.")
                return mod_type, ''
            return mod_type, positions
        elif mod_detail == 'X-Y,Z-W':
            positions = st.text_input("Enter multiple disulfide bonds (e.g., '2-5,6-10'):", "", key=f"cyc_mult_positions_{seq_num}")
            if positions and not re.match(r'^\d+-\d+(,\d+-\d+)*$', positions):
                st.error("Invalid format. Please use format like '2-5,6-10'.")
                return mod_type, ''
            return mod_type, positions
    
    return mod_type, mod_detail

def create_map_header(seq_num):
    """Create a standardized MAP format header with protein-level annotations"""
    header = f">seq{seq_num}"
    annotations = []
    
    # Organism
    org = st.text_input("Enter organism (e.g., 'Homo sapiens'):", "", key=f"org_{seq_num}")
    if org:
        annotations.append(f"{{org:{org}}}")
    
    # Function
    func = st.text_input("Enter function (e.g., 'Signal peptide'):", "", key=f"func_{seq_num}")
    if func:
        annotations.append(f"{{func:{func}}}")
    
    # Additional annotations
    annotation_type = st.selectbox(
        "Select annotation type:",
        ["None", "Subcellular location", "Source database", "Sequence length", 
         "Experimental status", "Binding partner", "Target", "Notes"],
        key=f"annot_type_{seq_num}"
    )
    
    if annotation_type != "None":
        value = st.text_input(f"Enter {annotation_type.lower()}:", "", key=f"annot_value_{seq_num}")
        if value:
            prefix = {
                "Subcellular location": "loc",
                "Source database": "src",
                "Sequence length": "len",
                "Experimental status": "exp",
                "Binding partner": "bind",
                "Target": "target",
                "Notes": "note"
            }[annotation_type]
            annotations.append(f"{{{prefix}:{value}}}")
    
def annotate_residues(sequence, seq_num):
    """Handle residue-level annotations"""
    st.write(f"Sequence: {sequence}")
    st.write("Positions: " + ' '.join(f"{i+1}:{aa}" for i, aa in enumerate(sequence)))
    
    # Initialize session state for modifications if not exists
    if f'modifications_{seq_num}' not in st.session_state:
        st.session_state[f'modifications_{seq_num}'] = []
    
    # Display current modifications
    if st.session_state[f'modifications_{seq_num}']:
        st.write("Current modifications:")
        for mod in st.session_state[f'modifications_{seq_num}']:
            st.write(f"- {mod['type']} at {mod['position']}: {mod['tag']}")
    
    # Modification form
    st.subheader("Add New Modification")
    mod_type = st.selectbox(
        "Modification category:",
        list(MODIFICATION_CATEGORIES.keys()),
        format_func=lambda x: MODIFICATION_CATEGORIES[x]['name'],
        key=f"mod_type_{seq_num}"
    )
    
    # Get position
    pos_options = [str(i+1) for i in range(len(sequence))] + ['N-term', 'C-term']
    pos = st.selectbox("Position to modify:", pos_options, key=f"pos_{seq_num}")
    
    if pos == 'N-term':
        actual_pos = 0
    elif pos == 'C-term':
        actual_pos = len(sequence) + 1
    else:
        actual_pos = int(pos)
    
    # Get modification details
    mod_type, mod_detail = get_modification_details(mod_type, seq_num)
    
    # For mutations, insertions, deletions - get additional details
    if mod_type == 'mut':
        mod_detail = st.text_input("Enter new residue(s) for mutation:", "", key=f"mut_detail_{seq_num}")
    elif mod_type == 'ins':
        mod_detail = st.text_input("Enter sequence to be inserted:", "", key=f"ins_detail_{seq_num}")
    elif mod_type == 'del':
        if actual_pos == 0 or actual_pos == len(sequence) + 1:
            mod_detail = ''  # Terminal deletions don't need detail
        else:
            mod_detail = st.text_input("Enter residue(s) to delete (leave empty to delete 1 residue):", "", key=f"del_detail_{seq_num}")
    
    # Create the tag
    tag = f"{{{mod_type}:{mod_detail}}}" if mod_detail else f"{{{mod_type}}}"
    
    if st.button("Add Modification", key=f"add_mod_{seq_num}"):
        st.session_state[f'modifications_{seq_num}'].append({
            'type': mod_type,
            'position': pos,
            'actual_pos': actual_pos,
            'detail': mod_detail,
            'tag': tag
        })
        st.rerun()
    
    # Apply all modifications to sequence
    modified_seq = list(sequence)
    offset = 0
    
    # Sort modifications by position (N-term first, then sequence positions, then C-term)
    sorted_mods = sorted(
        st.session_state[f'modifications_{seq_num}'],
        key=lambda x: (
            0 if x['actual_pos'] == 0 else 
            float('inf') if x['actual_pos'] == len(sequence) + 1 else 
            x['actual_pos']
        )
    )
    
    for mod in sorted_mods:
        if mod['type'] == 'cyc':
            # Cyclization always goes at the end
            modified_seq.append(mod['tag'])
        else:
            if mod['actual_pos'] == 0:  # N-terminal
                modified_seq.insert(0, mod['tag'])
            elif mod['actual_pos'] == len(sequence) + 1:  # C-terminal
                modified_seq.append(mod['tag'])
            else:
                insert_pos = mod['actual_pos'] - 1 + offset
                if mod['type'] == 'mut':
                    # For mutations, replace the residue with the tag
                    modified_seq[insert_pos] = mod['tag']
                elif mod['type'] == 'ins':
                    # For insertions, insert the tag
                    modified_seq.insert(insert_pos + 1, mod['tag'])
                    offset += len(mod['tag'])
                elif mod['type'] == 'del':
                    # For deletions, replace the residue(s) with the tag
                    if mod['detail']:
                        # Multiple residue deletion
                        num_residues = len(mod['detail'])
                        modified_seq[insert_pos:insert_pos+num_residues] = [mod['tag']]
                        offset -= (num_residues - len(mod['tag']))
                    else:
                        # Single residue deletion
                        modified_seq[insert_pos] = mod['tag']
                else:
                    # Other modification types (add tags)
                    modified_seq.insert(insert_pos + 1, mod['tag'])
                    offset += len(mod['tag'])
    
    # Clear modifications button
    if st.button("Clear All Modifications", key=f"clear_mods_{seq_num}"):
        st.session_state[f'modifications_{seq_num}'] = []
        st.rerun()
    
    return ''.join(modified_seq)

def main():
    st.title("MAP Sequence Converter")
    st.write("Convert protein sequences to MAP format with annotations")
    
    # Sequence input
    sequence_input = st.text_area("Paste your protein sequence here (without header):", 
                                 "ACDEFGHIK", 
                                 height=100,
                                 help="Enter a protein sequence in single-letter code format")
    
    cleaned_sequence = clean_sequence(sequence_input)
    
    if cleaned_sequence:
        st.success(f"Sequence length: {len(cleaned_sequence)} residues")
        
        # Process the sequence
        header = create_map_header(1)
        annotated_seq = annotate_residues(cleaned_sequence, 1)
        
        # Display current state
        st.subheader("MAP Format Output")
        st.code(header)
        st.code(annotated_seq)
        
        # Download button
        output_content = f"{header}\n{annotated_seq}"
        st.download_button(
            label="Download MAP File",
            data=output_content,
            file_name="sequence.map",
            mime="text/plain"
        )
    else:
        st.warning("Please enter a valid protein sequence")

if __name__ == "__main__":
    main()
