print('######################################################################################################')
print('############## You are using Modification and Annotation in Proteins (MAP) Script ####################')
print('##################### MAP Program, developed by Prof G. P. S. Raghava group. #########################')
print('############ Please cite: MAP; available at https://webs.iiitd.edu.in/raghava/maprepo/  ##############')
print('######################################################################################################')

import re
from pathlib import Path

# Modification options from the documentation
MODIFICATION_CATEGORIES = {
    'ptm': {
        'name': 'Post-Translational Modifications',
        'options': {
            'Phos': 'Phosphorylation',
            'Glyc': 'Glycosylation',
            'Ac': 'Acetylation',
            'Me': 'Methylation',
            'Ub': 'Ubiquitination',
            'Sumo': 'Sumoylation',
            'OH': 'Hydroxylation',
            'palm': 'Palmitoylation',
            '': 'Undefined PTM'
        }
    },
    'nnm': {
        'name': 'Non-Natural Modifications',
        'options': {
            'PEG': 'PEGylation',
            'Fluoro': 'Fluorination',
            'PMe': 'Phos-Methyl',
            'Biotin': 'Biotinylation',
            '': 'Undefined non-natural modification'
        }
    },
    'nnr': {
        'name': 'Non-Natural Residues',
        'options': {
            'Nle': 'Norleucine',
            'Hph': 'Homophenylalanine',
            'Can': 'Canavanine',
            'Orn': 'Ornithine',
            'Cit': 'Citrulline',
            'Har': 'Homoarginine',
            'Aze': 'Azetidine-2-carboxylic acid',
            'Bala': 'β-Alanine',
            'Aib': 'α-Aminoisobutyric acid',
            'Bpa': 'p-Benzoyl-L-phenylalanine',
            'Cha': 'Cyclohexylalanine',
            'Fpa': '4-Fluorophenylalanine',
            'Nal': '2-Naphthylalanine',
            '': 'Undefined non-natural residue'
        }
    },
    'iso': {
        'name': 'Isotopic & Fluorescent Labeling',
        'options': {
            '13C': 'Carbon-13 labeling',
            '15N': 'Nitrogen-15 labeling',
            '2H': 'Deuterium labeling',
            '18O': 'Oxygen-18 labeling',
            'Fluorescein': 'Fluorescein dye',
            'Cy5': 'Cyanine5 dye',
            'FITC': 'Fluorescein isothiocyanate',
            '': 'Undefined labeling'
        }
    },
    'IR': {
        'name': 'Interaction Residues',
        'options': {
            'DNA': 'DNA interaction',
            'RNA': 'RNA interaction',
            'Pro': 'Protein interaction',
            'ATP': 'ATP interaction',
            'GTP': 'GTP interaction',
            'UTP': 'UTP interaction',
            'NAD': 'NAD interaction',
            'FAD': 'FAD interaction',
            'FMN': 'FMN interaction',
            'Heme': 'Heme interaction',
            'B12': 'Vitamin B12 interaction',
            'CoA': 'Coenzyme A interaction',
            'Ca': 'Calcium interaction',
            'Zn': 'Zinc interaction',
            '': 'Undefined interaction'
        }
    },
    'd': {
        'name': 'D-Amino Acids',
        'options': {}
    },
    'cyc': {
        'name': 'Cyclization',
        'options': {
            'N-C': 'Head-to-tail cyclization',
            'X-Y': 'Single disulfide bond between specified positions',
            'X-Y,Z-W': 'Multiple disulfide bonds',
            '': 'Undefined cyclization'
        }
    },
    'nt': {
        'name': 'N-Terminal Modifications',
        'options': {
            'Amid': 'Amidation',
            'Formyl': 'Formylation',
            'Acet': 'Acetylation',
            'Glyco': 'Glycosylation',
            'Me': 'Methylation',
            '': 'Undefined N-terminal modification'
        }
    },
    'ct': {
        'name': 'C-Terminal Modifications',
        'options': {
            'Amid': 'Amidation',
            'Glyco': 'Glycosylation',
            'Acet': 'Acetylation',
            '': 'Undefined C-terminal modification'
        }
    },
    'mut': {
        'name': 'Mutations',
        'options': {}
    },
    'ins': {
        'name': 'Insertions',
        'options': {}
    },
    'del': {
        'name': 'Deletions',
        'options': {}
    },
    'conj': {
        'name': 'Conjugation of Macromolecules',
        'options': {
            'Lipid': 'Lipid conjugation',
            'Mal': 'Maleimide conjugation',
            'DBCO': 'Dibenzocyclooctyne conjugation',
            '': 'Undefined conjugation'
        }
    }
}


def clean_sequence(sequence):
    """Remove whitespace and numbers from the sequence"""
    return re.sub(r'[\s\d]', '', sequence.upper())

def get_modification_details(mod_type):
    """Get details for a specific modification type"""
    if mod_type not in MODIFICATION_CATEGORIES:
        return mod_type, ''
    
    category = MODIFICATION_CATEGORIES[mod_type]
    if not category['options']:
        return mod_type, ''
    
    print(f"\nSelect {category['name']} type:")
    options = list(category['options'].items())
    for i, (code, desc) in enumerate(options, 1):
        print(f"{i}. {code}: {desc}" if code else f"{i}. Select")
    
    choice = input("Enter your choice (number): ")
    try:
        selected = options[int(choice)-1][0]
        return mod_type, selected
    except (ValueError, IndexError):
        print("Invalid selection. Using default.")
        return mod_type, ''

def create_map_header():
    """Create a standardized MAP format header with protein-level annotations"""
    header = ">seq1"
    annotations = []
    
    # Organism
    org = input("Enter organism (e.g., 'Homo sapiens', leave empty to skip): ")
    if org:
        annotations.append(f"{{org:{org}}}")
    
    # Function
    func = input("Enter function (e.g., 'Signal peptide', leave empty to skip): ")
    if func:
        annotations.append(f"{{func:{func}}}")
    
    # Additional annotations
    print("\nAdditional annotation types:")
    annotation_types = ["None", "Subcellular location", "Source database", "Sequence length", 
                      "Experimental status", "Binding partner", "Target", "Notes"]
    for i, atype in enumerate(annotation_types[1:], 1):
        print(f"{i}. {atype}")
    
    choice = input("Select annotation type (number, 0 for None): ")
    if choice != "0":
        try:
            annotation_type = annotation_types[int(choice)]
            value = input(f"Enter {annotation_type.lower()}: ")
            if value:
                prefix = {
                    "Subcellular location": "loc",
                    "Source database": "src",
                    "Sequence length": "len",
                    "Experimental status": "exp",
                    "Binding partner": "bind",
                    "Target": "target",
                    "Notes": "note"
                }[annotation_type]
                annotations.append(f"{{{prefix}:{value}}}")
        except (ValueError, IndexError):
            print("Invalid selection. Skipping annotation.")
    
    return ' '.join([header] + annotations) if annotations else header

def annotate_residues(sequence):
    """Handle residue-level annotations"""
    print(f"\nSequence: {sequence}")
    print("Positions: " + ' '.join(f"{i+1}:{aa}" for i, aa in enumerate(sequence)))
    
    modifications = []
    
    while True:
        print("\nCurrent modifications:")
        for mod in modifications:
            print(f"- {mod['type']} at {mod['position']}: {mod['tag']}")
        
        print("\nModification options:")
        for i, (code, cat) in enumerate(MODIFICATION_CATEGORIES.items(), 1):
            print(f"{i}. {cat['name']} ({code})")
        print("0. Done with modifications")
        
        choice = input("Select modification category (number): ")
        if choice == "0":
            break
            
        try:
            mod_type = list(MODIFICATION_CATEGORIES.keys())[int(choice)-1]
        except (ValueError, IndexError):
            print("Invalid selection. Please try again.")
            continue
        
        # Get position
        pos_options = [str(i+1) for i in range(len(sequence))] + ['N-term', 'C-term']
        print("\nSelect position to modify:")
        for i, pos in enumerate(pos_options, 1):
            print(f"{i}. {pos}")
        
        pos_choice = input("Enter position choice (number): ")
        try:
            pos = pos_options[int(pos_choice)-1]
        except (ValueError, IndexError):
            print("Invalid position. Please try again.")
            continue
            
        if pos == 'N-term':
            actual_pos = 0
        elif pos == 'C-term':
            actual_pos = len(sequence) + 1
        else:
            actual_pos = int(pos)
        
        # Get modification details
        mod_type, mod_detail = get_modification_details(mod_type)
        
        # For mutations, insertions, deletions - get additional details
        if mod_type == 'mut':
            mod_detail = input("Enter new residue(s) for mutation: ")
        elif mod_type == 'ins':
            mod_detail = input("Enter sequence to be inserted: ")
        elif mod_type == 'del':
            if actual_pos == 0 or actual_pos == len(sequence) + 1:
                mod_detail = ''  # Terminal deletions don't need detail
            else:
                mod_detail = input("Enter residue(s) to delete (leave empty to delete 1 residue): ")
        
        # Special handling for cyclization
        if mod_type == 'cyc':
            if mod_detail == 'X-Y':
                positions = input("Enter disulfide bond positions (e.g., '3-4'): ")
                if positions and not re.match(r'^\d+-\d+$', positions):
                    print("Invalid format. Please use format like '3-4'.")
                    continue
                mod_detail = positions
            elif mod_detail == 'X-Y,Z-W':
                positions = input("Enter multiple disulfide bonds (e.g., '2-5,6-10'): ")
                if positions and not re.match(r'^\d+-\d+(,\d+-\d+)*$', positions):
                    print("Invalid format. Please use format like '2-5,6-10'.")
                    continue
                mod_detail = positions
        
        # Create the tag
        tag = f"{{{mod_type}:{mod_detail}}}" if mod_detail else f"{{{mod_type}}}"
        
        modifications.append({
            'type': mod_type,
            'position': pos,
            'actual_pos': actual_pos,
            'detail': mod_detail,
            'tag': tag
        })
    
    # Apply all modifications to sequence
    modified_seq = list(sequence)
    offset = 0
    
    # Sort modifications by position (N-term first, then sequence positions, then C-term)
    sorted_mods = sorted(
        modifications,
        key=lambda x: (
            0 if x['actual_pos'] == 0 else 
            float('inf') if x['actual_pos'] == len(sequence) + 1 else 
            x['actual_pos']
        )
    )
    
    for mod in sorted_mods:
        if mod['type'] == 'cyc':
            # Cyclization always goes at the end
            modified_seq.append(mod['tag'])
        else:
            if mod['actual_pos'] == 0:  # N-terminal
                modified_seq.insert(0, mod['tag'])
            elif mod['actual_pos'] == len(sequence) + 1:  # C-terminal
                modified_seq.append(mod['tag'])
            else:
                insert_pos = mod['actual_pos'] - 1 + offset
                if mod['type'] == 'mut':
                    # For mutations, replace the residue with the tag
                    modified_seq[insert_pos] = mod['tag']
                elif mod['type'] == 'ins':
                    # For insertions, insert the tag
                    modified_seq.insert(insert_pos + 1, mod['tag'])
                    offset += len(mod['tag'])
                elif mod['type'] == 'del':
                    # For deletions, replace the residue(s) with the tag
                    if mod['detail']:
                        # Multiple residue deletion
                        num_residues = len(mod['detail'])
                        modified_seq[insert_pos:insert_pos+num_residues] = [mod['tag']]
                        offset -= (num_residues - len(mod['tag']))
                    else:
                        # Single residue deletion
                        modified_seq[insert_pos] = mod['tag']
                else:
                    # Other modification types (add tags)
                    modified_seq.insert(insert_pos + 1, mod['tag'])
                    offset += len(mod['tag'])
    
    return ''.join(modified_seq)

def main():
    print("MAP Sequence Converter")
    print("Convert protein sequences to MAP format with annotations\n")
    
    # Sequence input
    sequence_input = input("Paste your protein sequence here (without header) [default: ACDEFGHIK]: ") or "ACDEFGHIK"
    cleaned_sequence = clean_sequence(sequence_input)
    
    if cleaned_sequence:
        print(f"\nSequence length: {len(cleaned_sequence)} residues")
        
        # Process the sequence
        header = create_map_header()
        annotated_seq = annotate_residues(cleaned_sequence)
        
        # Display output
        print("\nMAP Format Output:")
        print(header)
        print(annotated_seq)
        
        # Save to file option
        save = input("\nWould you like to save to a file? (y/n): ").lower()
        if save == 'y':
            filename = input("Enter filename (default: sequence.map): ") or "sequence.map"
            with open(filename, 'w') as f:
                f.write(f"{header}\n{annotated_seq}")
            print(f"File saved as {filename}")
    else:
        print("Error: Please enter a valid protein sequence")

if __name__ == "__main__":
    main()