SHARE
TWEET

PTM confidence Proline

a guest Apr 25th, 2019 84 in 333 days
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. package fr.proline.core.algo.msi
  2.  
  3. import scala.collection.JavaConversions.collectionAsScalaIterable
  4. import scala.util.control.Breaks.break
  5. import scala.util.control.Breaks.breakable
  6. import fr.proline.context.IExecutionContext
  7. import fr.profi.util.regex.RegexUtils._
  8. import com.typesafe.scalalogging.LazyLogging
  9. import javax.persistence.EntityManager
  10. import scala.collection.mutable.ArrayBuffer
  11. import fr.proline.core.om.model.msi.ResultSummary
  12. import fr.proline.core.om.model.msi.PtmSite
  13. import fr.proline.core.om.model.msi.ProteinMatch
  14. import fr.proline.core.om.model.msi.SequenceMatch
  15. import fr.proline.core.om.model.msi.IPtmSpecificity
  16. import fr.proline.core.om.model.msi.PeptideInstance
  17. import fr.proline.core.om.model.msi.PtmDefinition
  18. import fr.proline.core.om.model.msi.LocatedPtm
  19. import fr.proline.core.om.model.msi.PeptideMatch
  20. import fr.proline.core.om.model.msi.PtmSite
  21. import fr.proline.core.om.model.msi.PtmLocation
  22.  
  23. case class PeptideInstancePtm(peptideInstance: PeptideInstance, ptm: LocatedPtm)
  24.  
  25. case class PtmSite(
  26.   val proteinMatchId: Long,
  27.   // ptm definition
  28.   val ptmDefinitionId: Long,
  29.   // position of the ptm site on the protein sequence
  30.   val seqPosition: Int,
  31.   // best (higher ptm probability) peptide match for this site
  32.   val bestPeptideMatchId: Long,
  33.   // The localization probability (percentage between 0 and 100)
  34.   val localizationConfidence: Float,
  35.   // map of peptide Ids matching that site, organized by position of the modification on the peptide sequence
  36.   val peptideIdsByPtmPosition: Map[Int, Array[Long]],
  37.   // array of matching peptide instances in leaf RSM
  38.   val peptideInstanceIds: Array[Long],
  39.   // array of peptide instances having the same sequence and modification of a matching peptide, but with their ptm located at another position
  40.   // those peptide instance did not match the ptm site, but they can confuse the quantification process
  41.   val isomericPeptideInstanceIds: Array[Long]
  42. ) {
  43.  
  44. }
  45.  
  46.  
  47. /**
  48.  * Determine PTMs site modifications
  49.  *
  50.  */
  51. class PtmSitesIdentifier() extends LazyLogging {
  52.  
  53.   /**
  54.    *
  55.    */
  56.   def identifyPtmSites(rsm: ResultSummary, proteinMatches: Array[ProteinMatch]): Iterable[PtmSite] = {
  57.  
  58.     val proteinMatchesById = proteinMatches.map { pm => pm.id -> pm }.toMap
  59.     val validatedProteinMatchesById = scala.collection.immutable.HashSet(rsm.getValidatedResultSet().get.proteinMatches.map(_.id): _*)
  60.          
  61.     val ptmSites = ArrayBuffer.empty[PtmSite]
  62.    
  63.     for (peptideSet <- rsm.peptideSets) {
  64.         for (proteinMatchId <- peptideSet.proteinMatchIds) {
  65.           // test if that proteinMatch is member of a validated protein sets
  66.           if (validatedProteinMatchesById.contains(proteinMatchId)) {
  67.  
  68.           def isModificationProbabilityDefined(pm: PeptideMatch, ptm: LocatedPtm): Boolean = {
  69.             // VDS : Correct Code
  70.             //          val result = (pm.properties.isDefined &&
  71.             //           pm.properties.get.ptmSiteProperties.isDefined &&
  72.             //           pm.properties.get.ptmSiteProperties.get.getMascotProbabilityBySite.isDefined &&
  73.             //           pm.properties.get.ptmSiteProperties.get.getMascotProbabilityBySite.get.contains(ptm.toReadableString()))
  74.             // VDS Workaround test for issue #16643
  75.             var result = false;
  76.             if (pm.properties.isDefined && pm.properties.get.ptmSiteProperties.isDefined && pm.properties.get.ptmSiteProperties.get.getMascotProbabilityBySite.isDefined) {
  77.               if (pm.properties.get.ptmSiteProperties.get.getMascotProbabilityBySite.get.contains(ptm.toReadableString()))
  78.                 result = true;
  79.               else {
  80.                 result = pm.properties.get.ptmSiteProperties.get.getMascotProbabilityBySite.get.contains(toOtherReadableString(ptm))
  81.               }
  82.             }
  83.             result
  84.           }
  85.  
  86.           val sequenceMatchesByPeptideId: Map[Long, SequenceMatch] = proteinMatchesById(proteinMatchId).sequenceMatches.map { sm => (sm.getPeptideId() -> sm) }.toMap
  87.           val proteinMatchSites = scala.collection.mutable.Map[(PtmDefinition, Int), ArrayBuffer[PeptideInstancePtm]]()
  88.           val peptideInstanceIdsBySeqPtm = scala.collection.mutable.Map[String, ArrayBuffer[Long]]()
  89.           val peptideInstancesById = peptideSet.getPeptideInstances().map(pi => (pi.id -> pi)).toMap
  90.  
  91.           for (peptideInstance <- peptideSet.getPeptideInstances().filter(!_.peptide.ptms.isEmpty)) {
  92.             val key = _getKey(peptideInstance)
  93.             peptideInstanceIdsBySeqPtm.getOrElseUpdate(key, ArrayBuffer.empty[Long]) += peptideInstance.id
  94.  
  95.             val seqMatch = sequenceMatchesByPeptideId(peptideInstance.peptide.id)
  96.             for (ptm <- peptideInstance.peptide.ptms) {
  97.               if (isModificationProbabilityDefined(peptideInstance.peptideMatches.head, ptm)) {
  98.                 proteinMatchSites.getOrElseUpdate((ptm.definition, ptm.seqPosition + seqMatch.start - 1), ArrayBuffer.empty[PeptideInstancePtm]) += PeptideInstancePtm(peptideInstance, ptm)
  99.               }
  100.             }
  101.           }
  102.  
  103.           val site = proteinMatchSites.map {
  104.             case (k, peptideInstances) =>
  105.  
  106.               def modificationProbability(pm: PeptideMatch, ptm: LocatedPtm): Float = {
  107.                 //  VDS Workaround test for issue #16643
  108.                 val f = if (pm.properties.get.ptmSiteProperties.get.getMascotProbabilityBySite.get.contains(ptm.toReadableString())) {
  109.                   pm.properties.get.ptmSiteProperties.get.getMascotProbabilityBySite.get(ptm.toReadableString())
  110.                 } else {
  111.                   pm.properties.get.ptmSiteProperties.get.getMascotProbabilityBySite.get(toOtherReadableString(ptm))
  112.                 }
  113.                 f
  114.                 //VDS : Correct Code
  115.                 //             pm.properties.get.ptmSiteProperties.get.getMascotProbabilityBySite.get(ptm.toReadableString())              
  116.               }
  117.  
  118.               // -- Search for the best PeptideMatch        
  119.               //  Should order by score before getting max value. maxBy don't respect "first for equal order" !
  120.               val bestPMs = peptideInstances.map(t =>
  121.                 {
  122.                   var bestProba: Float = 0.00f;
  123.                   var bestPM: PeptideMatch = null;
  124.                   val sortedPepMatches: Array[PeptideMatch] = t.peptideInstance.peptideMatches.sortBy(_.score).reverse
  125.                   sortedPepMatches.foreach { pepM =>
  126.                     val proba = modificationProbability(pepM, t.ptm);
  127.                     if (proba > bestProba) {
  128.                       bestPM = pepM
  129.                       bestProba = proba
  130.                     }
  131.                   }
  132.                   (bestPM -> t.ptm)
  133.                 });
  134.  
  135.               var bestPeptideMatch: PeptideMatch = null
  136.               var bestProba: Float = 0.00f;
  137.               val sortedBestPMs = bestPMs.sortBy(_._1.score).reverse
  138.               sortedBestPMs.foreach(f => {
  139.                 val proba = modificationProbability(f._1, f._2);
  140.                 if (proba > bestProba) {
  141.                   bestPeptideMatch = f._1
  142.                   bestProba = proba
  143.                 }
  144.               })
  145.  
  146.               val isomericPeptideInstanceIds = peptideInstances.flatMap(piptm => peptideInstanceIdsBySeqPtm(_getKey(piptm.peptideInstance))).distinct
  147.               isomericPeptideInstanceIds --= peptideInstances.map(_.peptideInstance.id)
  148.               val isomericPeptideInstances = isomericPeptideInstanceIds.map(id => peptideInstancesById(id))
  149.  
  150.               //            val peptideMatchesSeq = peptideInstances.map(_.peptideInstance.peptide.sequence).toArray
  151.               //            val isomericPeptideMatchesSeq = isomericPeptideInstances.map(_.peptide.sequence).toArray
  152.               //            println(proteinMatchesById(proteinMatchId).accession + ", "+k._2+", "+k._1.toReadableString()+", matches = "+peptideMatchesSeq.mkString(",")+"("+peptideInstances.map(_.peptideInstance.id).toArray.mkString(",")+")"+", isomeric matches = "+isomericPeptideMatchesSeq.mkString(",")+"("+isomericPeptideInstances.map(_.id).toArray.mkString(",")+")")
  153.  
  154.               val peptideIdsBySeqPosition = peptideInstances.groupBy(_.ptm.seqPosition).mapValues(_.map(_.peptideInstance.peptide.id).toArray)
  155.  
  156.               PtmSite(
  157.                 proteinMatchId = proteinMatchId,
  158.                 ptmDefinitionId = k._1.id,
  159.                 seqPosition = k._2,
  160.                 bestPeptideMatchId = bestPeptideMatch.id,
  161.                 localizationConfidence = bestProba,
  162.                 peptideIdsByPtmPosition = peptideIdsBySeqPosition,
  163.                 peptideInstanceIds = peptideInstances.map(_.peptideInstance.id).toArray,
  164.                 isomericPeptideInstanceIds = isomericPeptideInstances.map(_.id).toArray)
  165.           }
  166.           ptmSites ++= site
  167.         }
  168.       }
  169.     }
  170.     logger.info(ptmSites.size + " Ptm sites identified")
  171.     ptmSites
  172.   }
  173.  
  174.   def aggregatePtmSites(childrenSites: Array[Iterable[PtmSite]], sitesProteinMatches: Array[ProteinMatch], parentProteinMatches: Array[ProteinMatch], pmScoreProvider: (Array[Long]) => Map[Long, Double]): Iterable[PtmSite] = {
  175.     val proteinAccessionByProteinMatchId = sitesProteinMatches.map { pm => pm.id -> pm.accession }.toMap
  176.     val proteinMatchesByAccession = parentProteinMatches.map { pm => pm.accession -> pm }.toMap
  177.     val ptmSites = ArrayBuffer.empty[PtmSite]
  178.  
  179.     val ptmSitesMap = scala.collection.mutable.Map[(String, Long, Int), ArrayBuffer[PtmSite]]()
  180.  
  181.     for (sites <- childrenSites) {
  182.       sites.foreach { site =>
  183.         ptmSitesMap.getOrElseUpdate((proteinAccessionByProteinMatchId(site.proteinMatchId), site.ptmDefinitionId, site.seqPosition), ArrayBuffer.empty[PtmSite]) += site
  184.       }
  185.     }
  186.  
  187.     ptmSitesMap.foreach {
  188.       case (key, value) =>
  189.         val peptideMap = value.map(_.peptideIdsByPtmPosition).flatten
  190.         val newPeptideMap = peptideMap.groupBy(_._1).map { case (k, v) => k -> v.map(_._2).flatten.distinct.toArray }
  191.         val bestProbabilities = value.map(_.bestPeptideMatchId) zip value.map(_.localizationConfidence)
  192.         // TODO : need to retrieve bestProbabilities._1 peptideMatches to determine their identification score
  193.         // and choose the right "best" PSM
  194.         val pmScoresById = pmScoreProvider(bestProbabilities.map(_._1).toArray)
  195.         val newBestPTMProbability = bestProbabilities.maxBy(_._2)._2
  196.         val newBestPeptideMatchId = bestProbabilities.filter(_._2 >= newBestPTMProbability).map(x => (x._1 , pmScoresById(x._1))).maxBy(_._2)._1
  197.        
  198.         val newPeptideInstanceIds = value.map(_.peptideInstanceIds).flatten.distinct
  199.         val newIsomericPeptideInstanceIds = value.map(_.isomericPeptideInstanceIds).flatten.distinct
  200.  
  201.         val newSite = PtmSite(proteinMatchId = proteinMatchesByAccession(key._1).id,
  202.           ptmDefinitionId = key._2,
  203.           seqPosition = key._3,
  204.           bestPeptideMatchId = newBestPeptideMatchId,
  205.           localizationConfidence = newBestPTMProbability,
  206.           peptideIdsByPtmPosition = newPeptideMap,
  207.           peptideInstanceIds = newPeptideInstanceIds.toArray,
  208.           isomericPeptideInstanceIds = newIsomericPeptideInstanceIds.toArray)
  209.         ptmSites += newSite
  210.     }
  211.     logger.info(ptmSites.size + " Ptm sites identified")
  212.     ptmSites
  213.   }
  214.  
  215.   /*
  216.      * VDS Workaround test for issue #16643  
  217.      */
  218.   private def toOtherReadableString(ptm: LocatedPtm) = {
  219.     val ptmDef = ptm.definition
  220.     val shortName = ptmDef.names.shortName
  221.  
  222.     val ptmConstraint = if (ptm.isNTerm || ptm.isCTerm) {
  223.       val loc = PtmLocation.withName(ptmDef.location)
  224.       var otherLoc: String = ""
  225.       loc match {
  226.         case PtmLocation.ANY_C_TERM  => otherLoc = PtmLocation.PROT_C_TERM.toString()
  227.         case PtmLocation.PROT_C_TERM => otherLoc = PtmLocation.ANY_C_TERM.toString()
  228.         case PtmLocation.ANY_N_TERM  => otherLoc = PtmLocation.PROT_N_TERM.toString()
  229.         case PtmLocation.PROT_N_TERM => otherLoc = PtmLocation.ANY_N_TERM.toString()
  230.       }
  231.       otherLoc
  232.  
  233.     } else "" + ptmDef.residue + ptm.seqPosition
  234.  
  235.     s"${shortName} (${ptmConstraint})"
  236.   }
  237.  
  238.   /*
  239.    * get a key for the given PeptideInstance based on sequence and ptms definition sorted by name. This means that to peptide instances with
  240.    * same sequence and a same modification located at a different position get the same key.
  241.    */
  242.   private def _getKey(peptideInstance: PeptideInstance): String = {
  243.     peptideInstance.peptide.sequence + peptideInstance.peptide.ptms.map(_.definition.toReadableString()).sorted.mkString
  244.   }
  245.  
  246. }
  247.  
  248. package fr.proline.core.algo.msi.scoring
  249. import fr.proline.core.om.model.msi.PeptideMatch
  250. import scala.collection.mutable.HashMap
  251.  
  252. object PTMLocConfidence {
  253.  
  254.   def computeLocalisationConfidences(peptideMatches:Seq[PeptideMatch]) : Map[PeptideMatch, Float] = {
  255.     val confidences = new HashMap[PeptideMatch, Float]()
  256.         //TODO : pourquoi pas de filtre sur les PepMatch a PTM vs sans PTM
  257.     val peptideMatchesByQueryId = peptideMatches.groupBy(_.msQueryId)
  258.     for ((queryId, peptideMatches) <- peptideMatchesByQueryId) {
  259.       val matches = peptideMatches.sortWith( _.score > _.score)
  260.       val assignments = matches.filter( p => p.peptide.sequence == matches(0).peptide.sequence)
  261.       if (assignments.length > 1) {
  262.         confidences ++= _computeRelativeProbabilities(assignments)
  263.       }
  264.     }
  265.     confidences.toMap
  266.   }
  267.  
  268.   def _computeRelativeProbabilities(assignments:Seq[PeptideMatch]) : Map[PeptideMatch, Float] = {
  269.     val confidences = new HashMap[PeptideMatch, Float]()
  270.     val _r = (s1:Float, si:Float, md10Prob:Float) => scala.math.pow(10,md10Prob*(s1 - si)).toFloat
  271.     var sum = 0.0f
  272.     val s1 = assignments(0).score
  273.     for (peptideMatch <- assignments) {
  274.       val v = _r( -s1 , -peptideMatch.score , 0.1f )
  275.       sum += v
  276.       confidences(peptideMatch) = v
  277.     }
  278.     for ((pm, p) <- confidences) {
  279.       confidences(pm) = p/sum
  280.     }
  281.     confidences.toMap        
  282.   }
  283.  
  284. }
  285.  
  286. package fr.proline.core.algo.msi.scoring
  287. import fr.proline.core.om.model.msi.PeptideMatch
  288. import scala.collection.mutable.HashMap
  289.  
  290. object PTMLocConfidence {
  291.  
  292.   def computeLocalisationConfidences(peptideMatches:Seq[PeptideMatch]) : Map[PeptideMatch, Float] = {
  293.     val confidences = new HashMap[PeptideMatch, Float]()
  294.         //TODO : pourquoi pas de filtre sur les PepMatch a PTM vs sans PTM
  295.     val peptideMatchesByQueryId = peptideMatches.groupBy(_.msQueryId)
  296.     for ((queryId, peptideMatches) <- peptideMatchesByQueryId) {
  297.       val matches = peptideMatches.sortWith( _.score > _.score)
  298.       val assignments = matches.filter( p => p.peptide.sequence == matches(0).peptide.sequence)
  299.       if (assignments.length > 1) {
  300.         confidences ++= _computeRelativeProbabilities(assignments)
  301.       }
  302.     }
  303.     confidences.toMap
  304.   }
  305.  
  306.   def _computeRelativeProbabilities(assignments:Seq[PeptideMatch]) : Map[PeptideMatch, Float] = {
  307.     val confidences = new HashMap[PeptideMatch, Float]()
  308.     val _r = (s1:Float, si:Float, md10Prob:Float) => scala.math.pow(10,md10Prob*(s1 - si)).toFloat
  309.     var sum = 0.0f
  310.     val s1 = assignments(0).score
  311.     for (peptideMatch <- assignments) {
  312.       val v = _r( -s1 , -peptideMatch.score , 0.1f )
  313.       sum += v
  314.       confidences(peptideMatch) = v
  315.     }
  316.     for ((pm, p) <- confidences) {
  317.       confidences(pm) = p/sum
  318.     }
  319.     confidences.toMap        
  320.   }
  321.  
  322. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top