#### # Immunological Bioinformatics January 2019 # Exercise: Working with peptide data # Leon Eyrich Jessen #### # Getting started # ---------------------------- # Install packages install.packages("devtools") install.packages("tidyverse") devtools::install_github("leonjessen/PepTools2") devtools::install_github("omarwagih/ggseqlogo") # Load libraries library("tidyverse") library("PepTools2") library("ggseqlogo") # Sequence logos # ---------------------------- my_peps = pep_ran(n = 1000, k = 9) head(my_peps, 10) ggseqlogo(my_peps) # Q1: Take a look at the logo. Which peptide positions seem to be important? head(PEPTIDES, 10) ggseqlogo(PEPTIDES) # Q2: Take a look at the logo. Which peptide positions seem to be important? # Q3: Why are there no amino acids below zero? PEPTIDES %>% kullback_leibler(aa_background_distribution = "Eukaryota") %>% pluck("KLD_logo_aa_height") %>% t %>% ggseqlogo(method = "custom") # Q4: Why are there amino acids below zero? pssm_counts(PEPTIDES) # Q5: How many Leucines and Glycines are found at p2? Why? pssm_freqs(PEPTIDES) # Q6: What is the positional sum of the frequencies? # From Protein to Prediction # ---------------------------- (my_seq = read_fasta(file = 'sequence.fasta')$sequence[1]) my_peps = my_seq %>% pep_chop(9) %>% pluck(1) head(my_peps, 10) write_tsv(as_tibble(my_peps), path = 'my_peps.txt', col_names = FALSE) my_preds = read_tsv(file = '25127_NetMHCpan.xls', skip = 1) my_preds %>% filter(Rank <= 2) %>% View # Q7: How many binders did you find? # Q8: Which peptide is the predicted strongest binder? my_preds %>% filter(Rank <= 2) %>% arrange(Rank) %>% View my_binders = my_preds %>% filter(Rank <= 2) %>% pull(Peptide) # Q9: What is the frequency of Valine at p9? pssm_freqs(my_binders) # The netTCR-1.0 Server # ---------------------------- # Q10: How many binders did you find? my_tcrs = read_tsv(file = 'results.csv') my_tcrs %>% View my_tcrs %>% filter(Prediction >= 0.5) %>% nrow # Why 0.5? Thresholds are "tricky", AUC is threshold independent # A bit of Biology # ---------------------------- # Q11: Immunity to malaria is an antibody mediated acquired immunity - Why? # Q12: How relevant is the HLA-A*02:01 allele if you wanted to create a malaria vaccine? # Optional # ---------------------------- # ?