Usages of

org.yeastrc.fasta.FASTAReader.getInstance(?)
public static Map<String, ProteinSequences> readFile(File fastaFile) throws Exception { Map<String, ProteinSequences> sequences = new HashMap<String, ProteinSequences>(); FASTAReader reader = FASTAReader.getInstance(fastaFile.getAbsolutePath()); FASTAEntry entry = reader.readNext(); while (entry != null) { sequences.put(entry.getHeaderLine().substring(1), new ProteinSequences(entry.getSequence())); entry = reader.readNext(); } return sequences; }
public static FASTAReader getFASTAReader(File fastaDoc) throws Exception { CompressedFileFilter filter = new CompressedFileFilter(); if(filter.accept(fastaDoc)) { // compressed return FASTAReader.getInstance(new BufferedInputStream(new GZIPInputStream(new BufferedInputStream(new FileInputStream(fastaDoc), 1024*1024)), 1024*1024)); } else { // plain return FASTAReader.getInstance(new BufferedInputStream(new FileInputStream(fastaDoc), 1024*1024)); } }
public static void subSample(String fastaFileLocation, String subsetLocation, int nrSequences) { int countNrSequences = 0; try { File fastaFile = new File(fastaFileLocation); File subset = new File(subsetLocation); FASTAReader fastaReader = FASTAReader.getInstance(fastaFile); FASTAEntry fastaEntry; while((fastaEntry = fastaReader.readNext()) != null) { countNrSequences++; } fastaReader.close(); Set<Integer> lines = new HashSet<Integer>(); int n; while(lines.size() <= nrSequences) { Random rand = new Random(); n = rand.nextInt(countNrSequences) + 1; lines.add(n); } int lineNr = 0; fastaReader = FASTAReader.getInstance(fastaFile); PrintWriter subsetFileWriter = new PrintWriter(subset); while((fastaEntry = fastaReader.readNext()) != null) { if(lines.contains(lineNr++)) { subsetFileWriter.println(fastaEntry.getHeaderLine()); subsetFileWriter.println(fastaEntry.getSequence()); } } subsetFileWriter.close(); fastaReader.close(); System.out.println("File written"); } catch (Exception e) { e.printStackTrace(); } }
public static void export(Cluster cluster, OutputStream output) { List<String> leafIds = cluster.getTree().getLeavesAsString(cluster.getRoot()); PrintWriter printWriter = new PrintWriter(output); try { FASTAReader fastaReader = FASTAReader.getInstance(Settings.getInstance().getAlignmentLocation()); FASTAEntry entry = fastaReader.readNext(); while( entry != null ) { if(leafIds.contains(entry.getHeaderLine().substring(1))) { // System.out.println("Found: " + entry.getHeaderLine()); printWriter.write(entry.getHeaderLine() + "\n" + entry.getSequence() + "\n"); } // get the next entry in the FASTA file entry = fastaReader.readNext(); } } catch (Exception e) { e.printStackTrace(); } }
public static void main(String[] args) { try { // FASTAReader fastaReader = FASTAReader.getInstance(new File("/Users/ewout/Documents/phylogeo/EUResist_POL/alignment.ninja.noSDRM.fasta")); // BufferedReader bufferedReader = new BufferedReader(new FileReader( // "/Users/ewout/Documents/phylogeo/EUResist_POL/constraint_tree/constraint_tree.csv")); // BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter( // "/Users/ewout/Documents/phylogeo/EUResist_POL/constraint_tree/constraint_tree.fasta")); FASTAReader fastaReader = FASTAReader.getInstance(new File("/Users/ewout/Documents/phylogeo/EUResist_POL/alignment.ninja.noSDRM.fasta")); BufferedReader bufferedReader = new BufferedReader(new FileReader( "/Users/ewout/Documents/phylogeo/EUResist_POL/subtyping_attempt2/result.cut.sorted.csv")); BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter( "/Users/ewout/Documents/phylogeo/EUResist_POL/constraint_tree/full_constraint_tree.txt")); String line = ""; // List<String> temp = Arrays.asList("18726", "18748", "18771", "18848", "18909", "18957", "146", "157", "12226", "12228", "4654", "5745", "6363", // "6436", "8673", "8750", "9070", "10492", "13283", "15245", "4807", "4837", "7239", "9109", "11315", "12981", "13632", "14775", "20587", // "20930", "14", "15", "16", "17", "18", "19", "20", "20938", "32049", "6", "85", "86", "291", "1063", "3770", "4056", "6950", "10044", // "18520", "32015", "34098", "34187", "34974", "35373", "37101", "37739", "301", "4297", "4652", "20929", "18663", "20656", "33620", "33678", // "37914", "40063", "41194", "41354", "12", "1091", "1125", "41132", "17894", "17919", "17950", "17973", "24668", "24691", "25696", "31677"); List<String> temp = new ArrayList<String>(); Map<String, Set<String>> subtypes = new HashMap<String, Set<String>>(); FASTAEntry fastaEntry = null; while ((fastaEntry = fastaReader.readNext()) != null) { temp.add(fastaEntry.getHeaderLine().replaceAll("\\s+", "").substring(1)); } String subtype = ""; String id = ""; while ((line = bufferedReader.readLine()) != null) { id = line.split(",")[0]; subtype = line.split(",")[1]; if (!subtypes.containsKey(subtype)) { subtypes.put(subtype, new HashSet<String>()); } if(temp.contains(id)) { subtypes.get(subtype).add(id); } } // Queue<String> partTree = new PriorityQueue<String>(); StringBuilder stringBuilder = new StringBuilder("(("); // System.out.print("("); for (Entry<String, Set<String>> entry : subtypes.entrySet()) { Set<String> queue = entry.getValue(); if(queue.size() > 1) { Iterator<String> iterator = queue.iterator(); while (iterator.hasNext()) { String id2 = iterator.next(); stringBuilder.append(id2 + ","); } stringBuilder.setLength(stringBuilder.length() - 1); stringBuilder.append("),("); } else { if(!entry.getValue().isEmpty()) { stringBuilder.setLength(stringBuilder.length() - 1); stringBuilder.append(entry.getValue().iterator().next() + ",("); } } } stringBuilder.setLength(stringBuilder.length() - 2); stringBuilder.append(");"); bufferedWriter.write(stringBuilder.toString()); bufferedWriter.flush(); // while (!partTree.isEmpty() && (partTree.size() > 1)) { // String element1 = partTree.poll(); // String element2 = partTree.poll(); // partTree.add("(" + element1 + "," + element2 + ")"); // } // // System.out.println(partTree.poll()); // FASTAEntry fastaEntry = null; // while ((fastaEntry = fastaReader.readNext()) != null) { // if (temp.contains(fastaEntry.getHeaderLine().replaceAll("\\s+", "").substring(1))) { // bufferedWriter.write(fastaEntry.getHeaderLine() + "\n"); // bufferedWriter.write(fastaEntry.getSequence() + "\n"); // bufferedWriter.flush(); // } // } bufferedReader.close(); bufferedWriter.close(); } catch (Exception e) { e.printStackTrace(); } }
private boolean blastCheck(String alignmentLocation, String sequenceLocation) { File alignmentFile = new File(alignmentLocation); File sequenceFile = new File(sequenceLocation); FASTAReader fastaReader = null; FASTAEntry fastaEntry = null; try { fastaReader = FASTAReader.getInstance(sequenceFile); fastaEntry = fastaReader.readNext(); } catch (Exception e1) { e1.printStackTrace(); } AlignmentImpl alignmentImpl = new AlignmentImpl(AlignmentSequenceType.NT, alignmentFile); BlastSequenceImpl blastSequenceImpl = new BlastSequenceImpl(fastaEntry.getSequence().length(), sequenceFile); double cutoff = 50.0; String blastOptions = "-q -1"; try { Result result = new BlastAnalysis(alignmentImpl, cutoff, blastOptions).run(alignmentImpl, blastSequenceImpl, Files.createTempDir()); return result.haveSupport(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } return false; }
private boolean checkWTextArea(InputStream stream) { // We first need to make a copy of the input stream such that we can use it multiple times. ByteArrayOutputStream baos = new ByteArrayOutputStream(); byte[] bytes = null; try { org.apache.commons.io.IOUtils.copy(stream, baos); bytes = baos.toByteArray(); baos.close(); stream.close(); } catch (IOException e) { e.printStackTrace(); } // Check if the input is correct ByteArrayInputStream byteArrayInputStream = null; try { byteArrayInputStream = new ByteArrayInputStream(bytes); FASTAReader fastaReader = FASTAReader.getInstance(byteArrayInputStream); fastaReader.readNext(); } catch (Exception e) { return false; } finally { try { byteArrayInputStream.close(); } catch (IOException e) { e.printStackTrace(); } } try { byteArrayInputStream = new ByteArrayInputStream(bytes); FASTAReader fastaReader = FASTAReader.getInstance(byteArrayInputStream); FASTAEntry fastaEntry; int counter = 0; int maxSequences = 1; textArea.setText(""); while((fastaEntry = fastaReader.readNext()) != null) { counter++; if(counter <= maxSequences) { String textAreaText = textArea.getText(); if(counter > 1) { textAreaText = textAreaText + "\n"; } textArea.setText(textAreaText + fastaEntry.getHeaderLine().replaceAll(" ", "_") + "\n" + fastaEntry.getSequence()); } else { final WDialog wDialog = new WDialog("Warning"); WTable wTable = new WTable(wDialog.getContents()); wTable.addStyleClass("tablePPlacer", true); wTable.setHeaderCount(1); wTable.setWidth(new WLength("100%")); wTable.setStyleClass("tableDialog"); WImage wImage = new WImage(new WLink(Main.getApp().getServletContext().getContextPath().concat("/images/warning.png"))); wImage.setWidth(new WLength(50)); wImage.setHeight(new WLength(50)); wTable.getElementAt(1, 1).addWidget(wImage); wTable.getElementAt(1, 1).setRowSpan(2); wTable.getElementAt(1, 2).addWidget(new WText("Your fasta file contained more than one sequence.")); wTable.getElementAt(2, 2).addWidget(new WText("Only the first sequence will be used.")); WPushButton wPushButton = new WPushButton("OK"); wPushButton.setWidth(new WLength(75)); wPushButton.clicked().addListener(dialog, new Signal1.Listener<WMouseEvent>() { public void trigger(WMouseEvent e1) { wDialog.reject(); } }); wTable.getElementAt(3, 2).addWidget(wPushButton); wTable.getElementAt(3, 2).setContentAlignment(AlignmentFlag.AlignCenter); wDialog.setPopup(true); wDialog.rejectWhenEscapePressed(); wDialog.show(); return true; } } } catch (Exception e) { e.printStackTrace(); return false; } finally { try { stream.close(); byteArrayInputStream.close(); } catch (IOException e) { e.printStackTrace(); } } return true; }
/** * Get the results of the analysis back in the form used by proxl: * reported peptides are the keys, and all of the PSMs (and their scores) * that reported that peptide are the values. * * @param analysis * @return * @throws Exception */ public Map<IProphetReportedPeptide, Collection<IProphetResult>> getResultsFromAnalysis( IProphetAnalysis analysis ) throws Exception { Map<IProphetReportedPeptide, Collection<IProphetResult>> results = new HashMap<IProphetReportedPeptide, Collection<IProphetResult>>(); for( MsmsRunSummary runSummary : analysis.getAnalysis().getMsmsRunSummary() ) { for( SpectrumQuery spectrumQuery : runSummary.getSpectrumQuery() ) { for( SearchResult searchResult : spectrumQuery.getSearchResult() ) { for( SearchHit searchHit : searchResult.getSearchHit() ) { for( AnalysisResult analysisResult : searchHit.getAnalysisResult() ) { if( analysisResult.getAnalysis().equals( "interprophet" ) ) { // only one interprophet result will appear for a search hit, and we are only // interested in search hits with an interprophet result. // skip this if it's a decoy if( PepXMLUtils.isDecoy( analysis.getDecoyIdentifiers(), searchHit) ) continue; // get our result IProphetResult result = getResult( runSummary, spectrumQuery, searchHit ); // skip if the probability is 0 (another way to check for decoys) if( result.getInterProphetScore().compareTo( new BigDecimal( "0" ) ) == 0 ) continue; // get our reported peptide IProphetReportedPeptide reportedPeptide = getReportedPeptide( searchHit, analysis ); if( !results.containsKey( reportedPeptide ) ) results.put( reportedPeptide, new ArrayList<IProphetResult>() ); results.get( reportedPeptide ).add( result ); /* * Kojak reports leucine/isoleucine variations as individual peptide matches in its results * file as tied as rank 1 hits to a spectrum. This is preferred by proxl, however, peptideprophet * and iprophet only score a single rank 1 hit for a spectrum. If we only keep the peptide that * iprophet scored, we may lose valuable information if the leucine->isoleucine variant of that * peptide matched proteins of interest in the FASTA file. * * To address this, iterate over the other search hits for this search result, and keep all other * rank 1 hits that are merely leucine/isoleucine substitutions of the scored rank 1 hit. */ Collection<IProphetReportedPeptide> otherReportedPeptides = getAllLeucineIsoleucineSubstitutions( reportedPeptide, searchResult, analysis ); for( IProphetReportedPeptide otherReportedPeptide : otherReportedPeptides ) { if( !results.containsKey( otherReportedPeptide ) ) results.put( otherReportedPeptide, new ArrayList<IProphetResult>() ); results.get( otherReportedPeptide ).add( result ); } } } } } } } /* * Because it is impossible to know if a reported peptide only maps to decoys or not in peptideprophet results * (since it also lists all proteins that match leucine/isoleucine substitutions as protein hits for a peptide) * we need to confirm whether or not the reported peptides whose leucine/isoleucine substitutions matched * proteins in the FASTA file exclusively match to decoys or not. If they do, remove them. */ Collection<IProphetReportedPeptide> reportedPeptidesToConfirm = new HashSet<>(); reportedPeptidesToConfirm.addAll( results.keySet() ); if( reportedPeptidesToConfirm.size() > 0 ) { // collection of all protein names we need to confirm Collection<String> proteinNames = new HashSet<>(); // cache the relevant protein sequences Map<String, String> proteinSequences = new HashMap<>(); for( IProphetReportedPeptide reportedPeptide : reportedPeptidesToConfirm ) { proteinNames.addAll( reportedPeptide.getPeptide1().getTargetProteins() ); if( reportedPeptide.getPeptide2() != null ) proteinNames.addAll( reportedPeptide.getPeptide2().getTargetProteins() ); } // build the cache of protein sequences FASTAReader reader = null; try { reader = FASTAReader.getInstance( analysis.getFastaFile() ); FASTAEntry entry = reader.readNext(); while( entry != null ) { for( FASTAHeader header : entry.getHeaders() ) { for( String testString : proteinNames ) { if( header.getName().startsWith( testString ) ) { proteinSequences.put( header.getName(), entry.getSequence() ); } } } entry = reader.readNext(); } } finally { if( reader != null ){ reader.close(); reader = null; } } // now have cache of relevant protein names and sequences. iterate over the reportedPeptidesToConfirm and // remove associated proteins from peptides where that peptide is not actually found in that protein for( IProphetReportedPeptide reportedPeptide : reportedPeptidesToConfirm ) { for (Iterator<String> i = reportedPeptide.getPeptide1().getTargetProteins().iterator(); i.hasNext();) { String protein = i.next(); boolean foundProtein = false; for( String cachedProteinName : proteinSequences.keySet() ) { if( cachedProteinName.startsWith( protein ) ) { if( proteinSequences.get( cachedProteinName ).toLowerCase().contains( reportedPeptide.getPeptide1().getSequence().toLowerCase() ) ) foundProtein = true; } } if( !foundProtein ) i.remove(); } if( reportedPeptide.getType() == IProphetConstants.LINK_TYPE_CROSSLINK ) { for (Iterator<String> i = reportedPeptide.getPeptide2().getTargetProteins().iterator(); i.hasNext();) { String protein = i.next(); boolean foundProtein = false; for( String cachedProteinName : proteinSequences.keySet() ) { if( cachedProteinName.startsWith( protein ) ) { if( proteinSequences.get( cachedProteinName ).toLowerCase().contains( reportedPeptide.getPeptide2().getSequence().toLowerCase() ) ) foundProtein = true; } } if( !foundProtein ) i.remove(); } } } // now we can iterate over the reportedPeptidesToConfirm and remove any from our results where there are 0 // targetProteins left for a peptide for( IProphetReportedPeptide reportedPeptide : reportedPeptidesToConfirm ) { if( reportedPeptide.getPeptide1().getTargetProteins().size() < 1 ) { System.out.println( "INFO: Removing " + reportedPeptide + " from results, does not match a target protein." ); results.remove( reportedPeptide ); } else if( reportedPeptide.getType() == IProphetConstants.LINK_TYPE_CROSSLINK && reportedPeptide.getPeptide2().getTargetProteins().size() < 1) { System.out.println( "INFO: Removing " + reportedPeptide + " from results, does not match a target protein." ); results.remove( reportedPeptide ); } } } return results; }
/** * Build and put in the MatchedProteins element in the XML document. * * @param proxlInput * @param proteinNames * @param peptides * @param fastaFile * @throws Exception */ private void buildMatchedProteinsElement( ProxlInput proxlInput, Collection<String> proteinNames, Collection<String> peptides, File fastaFile, Collection<String> decoyStrings ) throws Exception { Collection<String> sequences = new HashSet<>(); MatchedProteins xmlMatchedProteins = new MatchedProteins(); proxlInput.setMatchedProteins( xmlMatchedProteins ); // iterate over FASTA file, add entries for proteins IDed in the search FASTAReader reader = null; try { reader = FASTAReader.getInstance( fastaFile ); FASTAEntry entry = reader.readNext(); while( entry != null ) { // if this is a decoy entry, skip it if( isDecoyFastaEntry( entry, decoyStrings ) ) { // get the next entry in the FASTA file entry = reader.readNext(); continue; } boolean includeThisEntry = false; for( FASTAHeader header : entry.getHeaders() ) { for( String proteinName : proteinNames ) { // using startsWith instead of equals, since names in the results // may be truncated. if( header.getName().startsWith( proteinName ) ) { includeThisEntry = true; break; } } if( includeThisEntry ) break; } if( includeThisEntry ) { Protein xmlProtein = new Protein(); xmlMatchedProteins.getProtein().add( xmlProtein ); xmlProtein.setSequence( entry.getSequence() ); sequences.add( entry.getSequence() ); for( FASTAHeader header : entry.getHeaders() ) { ProteinAnnotation xmlProteinAnnotation = new ProteinAnnotation(); xmlProtein.getProteinAnnotation().add( xmlProteinAnnotation ); if( header.getDescription() != null ) xmlProteinAnnotation.setDescription( header.getDescription() ); xmlProteinAnnotation.setName( header.getName() ); Integer taxId = GetTaxonomyId.getInstance().getTaxonomyId( header.getName(), header.getDescription() ); if( taxId != null ) xmlProteinAnnotation.setNcbiTaxonomyId( BigInteger.valueOf( taxId ) ); } } // get the next entry in the FASTA file entry = reader.readNext(); } } finally { if( reader != null ) { reader.close(); reader = null; } } // ensure each peptides if found in at least one of the matched proteins' sequences for( String peptide : peptides ) { boolean found = false; for( String protein : sequences ) { if( protein.toLowerCase().contains( peptide.toLowerCase() ) ) { found = true; break; } } if( !found ) throw new Exception( "Could not find peptide sequence (" + peptide + ") in any matched protein..." ); } }
/** * Get a map of the distinct target protein sequences mapped to a collection of target annotations for that sequence * from the given fasta file, where the sequence contains any of the supplied peptide sequences * * @param allPetpideSequences * @param fastaFile * @param decoyIdentifiers * @return * @throws Exception */ private Map<String, Collection<FastaProteinAnnotation>> getProteins( Collection<String> allPetpideSequences, File fastaFile, Collection<String> decoyIdentifiers ) throws Exception { Map<String, Collection<FastaProteinAnnotation>> proteinAnnotations = new HashMap<>(); FASTAReader fastaReader = null; try { fastaReader = FASTAReader.getInstance( fastaFile ); for( FASTAEntry entry = fastaReader.readNext(); entry != null; entry = fastaReader.readNext() ) { if( isDecoyFastaEntry( entry, decoyIdentifiers ) ) continue; for( FASTAHeader header : entry.getHeaders() ) { if( !proteinAnnotations.containsKey( entry.getSequence() ) ) proteinAnnotations.put( entry.getSequence(), new HashSet<FastaProteinAnnotation>() ); FastaProteinAnnotation anno = new FastaProteinAnnotation(); anno.setName( header.getName() ); anno.setDescription( header.getDescription() ); Integer taxId = GetTaxonomyId.getInstance().getTaxonomyId( header.getName(), header.getDescription() ); if( taxId != null ) anno.setTaxonomId( taxId ); proteinAnnotations.get( entry.getSequence() ).add( anno ); } } } finally { if( fastaReader != null ) { fastaReader.close(); fastaReader = null; } } return proteinAnnotations; }
Usage snippet has been bookmarked! Review your bookmarks
Thank you! Review your likes