Usages of

org.yeastrc.fasta.FASTAReader.close()
public synchronized void index(File fastaDoc, File taxonDoc) throws Exception { if(fastaDoc == null) { throw new IllegalArgumentException("fastaDoc is null"); } String taxonTree = ""; if(taxonDoc != null && taxonDoc.exists()) { FileReader reader = new FileReader(taxonDoc); taxonTree = IOUtils.toString(reader); IOUtils.closeQuietly(reader); } FASTAReader reader = FastaFileReader.getFASTAReader(fastaDoc); FASTAEntry read = null; while((read = reader.readNext()) != null) { String headerLine = read.getHeaderLine(); if(headerLine.startsWith(">")) { headerLine = headerLine.substring(1); } final String f_filename = fastaDoc.getName(); final String sequence = read.getSequence(); final String header = headerLine; final String f_taxonTree = taxonTree; final boolean f_minStrandKmer = this.minStrandKmer; Runnable worker = new Runnable() { @Override public void run() { try { Document doc = freeQueue.poll(); if(doc == null) { doc = new Document(); Field filenameField = new StringField(IndexConstants.FIELD_FILENAME, "", Field.Store.YES); Field headerField = new StringField(IndexConstants.FIELD_HEADER, "", Field.Store.YES); Field sequenceDirectionField = new StringField(IndexConstants.FIELD_SEQUENCE_DIRECTION, "", Field.Store.YES); Field taxonTreeField = new StringField(IndexConstants.FIELD_TAXONOMY_TREE, "", Field.Store.YES); Field sequenceField = new TextField(IndexConstants.FIELD_SEQUENCE, "", Field.Store.NO); doc.add(filenameField); doc.add(headerField); doc.add(sequenceDirectionField); doc.add(taxonTreeField); doc.add(sequenceField); } StringField filenameField = (StringField) doc.getField(IndexConstants.FIELD_FILENAME); StringField headerField = (StringField) doc.getField(IndexConstants.FIELD_HEADER); StringField sequenceDirectionField = (StringField) doc.getField(IndexConstants.FIELD_SEQUENCE_DIRECTION); StringField taxonTreeField = (StringField) doc.getField(IndexConstants.FIELD_TAXONOMY_TREE); TextField sequenceField = (TextField) doc.getField(IndexConstants.FIELD_SEQUENCE); filenameField.setStringValue(f_filename); headerField.setStringValue(header); taxonTreeField.setStringValue(f_taxonTree); if(f_minStrandKmer) { // min-strand sequenceDirectionField.setStringValue("min_strand"); sequenceField.setStringValue(sequence); indexWriter.addDocument(doc); } else { // forward-strand sequenceDirectionField.setStringValue("forward"); sequenceField.setStringValue(sequence); indexWriter.addDocument(doc); // reverse-strand sequenceDirectionField.setStringValue("reverse"); sequenceField.setStringValue(SequenceHelper.getReverseComplement(sequence)); indexWriter.addDocument(doc); } freeQueue.offer(doc); } catch (Exception ex) { LOG.error("Exception occurred during index construction", ex); } } }; this.executor.execute(worker); } reader.close(); }
public void classify(File inputFasta, File classifyOutput, File summaryOutput) throws Exception { if(inputFasta == null) { throw new IllegalArgumentException("inputFasta is null"); } if(classifyOutput == null) { throw new IllegalArgumentException("classifyOutput is null"); } if(!classifyOutput.getParentFile().exists()) { classifyOutput.getParentFile().mkdirs(); } if(summaryOutput != null) { if(!summaryOutput.getParentFile().exists()) { summaryOutput.getParentFile().mkdirs(); } } FASTAReader reader = FastaFileReader.getFASTAReader(inputFasta); FASTAEntry read = null; FileWriter fw = new FileWriter(classifyOutput, false); final BufferedWriter bw = new BufferedWriter(fw, 1024*1024); final ClassificationResultSummary summary = new ClassificationResultSummary(); summary.setQueryFilename(inputFasta.getName()); summary.setStartTime(new Date()); int threads = this.conf.getWorkerThreads(); BlockingExecutor executor = new BlockingExecutor(threads, threads * 2); while((read = reader.readNext()) != null) { final String sequence = read.getSequence(); final String header = read.getHeaderLine(); Runnable worker = new Runnable() { @Override public void run() { try { ClassificationResult result = classifier.classify(header, sequence); JsonSerializer serializer = new JsonSerializer(); String json = serializer.toJson(result); synchronized(summary) { summary.report(result); } synchronized(bw) { bw.write(json + "\n"); } } catch (Exception ex) { LOG.error("Exception occurred during search", ex); } } }; executor.execute(worker); } executor.shutdown(); executor.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS); bw.close(); reader.close(); summary.setEndTime(new Date()); LOG.info("classifying " + summary.getQueryFilename() + " finished in " + summary.getTimeTaken() + " millisec"); if(summaryOutput != null) { summary.saveTo(summaryOutput); } }
public void classify(File inputFasta, File classifyOutput, File summaryOutput) throws Exception { if(inputFasta == null) { throw new IllegalArgumentException("inputFasta is null"); } if(classifyOutput == null) { throw new IllegalArgumentException("classifyOutput is null"); } if(!classifyOutput.getParentFile().exists()) { classifyOutput.getParentFile().mkdirs(); } if(summaryOutput != null) { if(!summaryOutput.getParentFile().exists()) { summaryOutput.getParentFile().mkdirs(); } } FASTAReader reader = FastaFileReader.getFASTAReader(inputFasta); FASTAEntry read = null; FileWriter fw = new FileWriter(classifyOutput, false); final BufferedWriter bw = new BufferedWriter(fw, 1024*1024); final ClassificationResultSummary summary = new ClassificationResultSummary(); summary.setQueryFilename(inputFasta.getName()); summary.setStartTime(new Date()); int threads = this.conf.getWorkerThreads(); BlockingExecutor executor = new BlockingExecutor(threads, threads * 2); while((read = reader.readNext()) != null) { final String sequence = read.getSequence(); final String header = read.getHeaderLine(); Runnable worker = new Runnable() { @Override public void run() { try { ClassificationResult result = classifier.classify(header, sequence); JsonSerializer serializer = new JsonSerializer(); String json = serializer.toJson(result); synchronized(summary) { summary.report(result); } synchronized(bw) { bw.write(json + "\n"); } } catch (Exception ex) { LOG.error("Exception occurred during search", ex); } } }; executor.execute(worker); } executor.shutdown(); executor.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS); bw.close(); reader.close(); summary.setEndTime(new Date()); LOG.info("classifying " + summary.getQueryFilename() + " finished in " + summary.getTimeTaken() + " millisec"); if(summaryOutput != null) { summary.saveTo(summaryOutput); } }
public static void subSample(String fastaFileLocation, String subsetLocation, int nrSequences) { int countNrSequences = 0; try { File fastaFile = new File(fastaFileLocation); File subset = new File(subsetLocation); FASTAReader fastaReader = FASTAReader.getInstance(fastaFile); FASTAEntry fastaEntry; while((fastaEntry = fastaReader.readNext()) != null) { countNrSequences++; } fastaReader.close(); Set<Integer> lines = new HashSet<Integer>(); int n; while(lines.size() <= nrSequences) { Random rand = new Random(); n = rand.nextInt(countNrSequences) + 1; lines.add(n); } int lineNr = 0; fastaReader = FASTAReader.getInstance(fastaFile); PrintWriter subsetFileWriter = new PrintWriter(subset); while((fastaEntry = fastaReader.readNext()) != null) { if(lines.contains(lineNr++)) { subsetFileWriter.println(fastaEntry.getHeaderLine()); subsetFileWriter.println(fastaEntry.getSequence()); } } subsetFileWriter.close(); fastaReader.close(); System.out.println("File written"); } catch (Exception e) { e.printStackTrace(); } }
/** * Get the results of the analysis back in the form used by proxl: * reported peptides are the keys, and all of the PSMs (and their scores) * that reported that peptide are the values. * * @param analysis * @return * @throws Exception */ public Map<IProphetReportedPeptide, Collection<IProphetResult>> getResultsFromAnalysis( IProphetAnalysis analysis ) throws Exception { Map<IProphetReportedPeptide, Collection<IProphetResult>> results = new HashMap<IProphetReportedPeptide, Collection<IProphetResult>>(); for( MsmsRunSummary runSummary : analysis.getAnalysis().getMsmsRunSummary() ) { for( SpectrumQuery spectrumQuery : runSummary.getSpectrumQuery() ) { for( SearchResult searchResult : spectrumQuery.getSearchResult() ) { for( SearchHit searchHit : searchResult.getSearchHit() ) { for( AnalysisResult analysisResult : searchHit.getAnalysisResult() ) { if( analysisResult.getAnalysis().equals( "interprophet" ) ) { // only one interprophet result will appear for a search hit, and we are only // interested in search hits with an interprophet result. // skip this if it's a decoy if( PepXMLUtils.isDecoy( analysis.getDecoyIdentifiers(), searchHit) ) continue; // get our result IProphetResult result = getResult( runSummary, spectrumQuery, searchHit ); // skip if the probability is 0 (another way to check for decoys) if( result.getInterProphetScore().compareTo( new BigDecimal( "0" ) ) == 0 ) continue; // get our reported peptide IProphetReportedPeptide reportedPeptide = getReportedPeptide( searchHit, analysis ); if( !results.containsKey( reportedPeptide ) ) results.put( reportedPeptide, new ArrayList<IProphetResult>() ); results.get( reportedPeptide ).add( result ); /* * Kojak reports leucine/isoleucine variations as individual peptide matches in its results * file as tied as rank 1 hits to a spectrum. This is preferred by proxl, however, peptideprophet * and iprophet only score a single rank 1 hit for a spectrum. If we only keep the peptide that * iprophet scored, we may lose valuable information if the leucine->isoleucine variant of that * peptide matched proteins of interest in the FASTA file. * * To address this, iterate over the other search hits for this search result, and keep all other * rank 1 hits that are merely leucine/isoleucine substitutions of the scored rank 1 hit. */ Collection<IProphetReportedPeptide> otherReportedPeptides = getAllLeucineIsoleucineSubstitutions( reportedPeptide, searchResult, analysis ); for( IProphetReportedPeptide otherReportedPeptide : otherReportedPeptides ) { if( !results.containsKey( otherReportedPeptide ) ) results.put( otherReportedPeptide, new ArrayList<IProphetResult>() ); results.get( otherReportedPeptide ).add( result ); } } } } } } } /* * Because it is impossible to know if a reported peptide only maps to decoys or not in peptideprophet results * (since it also lists all proteins that match leucine/isoleucine substitutions as protein hits for a peptide) * we need to confirm whether or not the reported peptides whose leucine/isoleucine substitutions matched * proteins in the FASTA file exclusively match to decoys or not. If they do, remove them. */ Collection<IProphetReportedPeptide> reportedPeptidesToConfirm = new HashSet<>(); reportedPeptidesToConfirm.addAll( results.keySet() ); if( reportedPeptidesToConfirm.size() > 0 ) { // collection of all protein names we need to confirm Collection<String> proteinNames = new HashSet<>(); // cache the relevant protein sequences Map<String, String> proteinSequences = new HashMap<>(); for( IProphetReportedPeptide reportedPeptide : reportedPeptidesToConfirm ) { proteinNames.addAll( reportedPeptide.getPeptide1().getTargetProteins() ); if( reportedPeptide.getPeptide2() != null ) proteinNames.addAll( reportedPeptide.getPeptide2().getTargetProteins() ); } // build the cache of protein sequences FASTAReader reader = null; try { reader = FASTAReader.getInstance( analysis.getFastaFile() ); FASTAEntry entry = reader.readNext(); while( entry != null ) { for( FASTAHeader header : entry.getHeaders() ) { for( String testString : proteinNames ) { if( header.getName().startsWith( testString ) ) { proteinSequences.put( header.getName(), entry.getSequence() ); } } } entry = reader.readNext(); } } finally { if( reader != null ){ reader.close(); reader = null; } } // now have cache of relevant protein names and sequences. iterate over the reportedPeptidesToConfirm and // remove associated proteins from peptides where that peptide is not actually found in that protein for( IProphetReportedPeptide reportedPeptide : reportedPeptidesToConfirm ) { for (Iterator<String> i = reportedPeptide.getPeptide1().getTargetProteins().iterator(); i.hasNext();) { String protein = i.next(); boolean foundProtein = false; for( String cachedProteinName : proteinSequences.keySet() ) { if( cachedProteinName.startsWith( protein ) ) { if( proteinSequences.get( cachedProteinName ).toLowerCase().contains( reportedPeptide.getPeptide1().getSequence().toLowerCase() ) ) foundProtein = true; } } if( !foundProtein ) i.remove(); } if( reportedPeptide.getType() == IProphetConstants.LINK_TYPE_CROSSLINK ) { for (Iterator<String> i = reportedPeptide.getPeptide2().getTargetProteins().iterator(); i.hasNext();) { String protein = i.next(); boolean foundProtein = false; for( String cachedProteinName : proteinSequences.keySet() ) { if( cachedProteinName.startsWith( protein ) ) { if( proteinSequences.get( cachedProteinName ).toLowerCase().contains( reportedPeptide.getPeptide2().getSequence().toLowerCase() ) ) foundProtein = true; } } if( !foundProtein ) i.remove(); } } } // now we can iterate over the reportedPeptidesToConfirm and remove any from our results where there are 0 // targetProteins left for a peptide for( IProphetReportedPeptide reportedPeptide : reportedPeptidesToConfirm ) { if( reportedPeptide.getPeptide1().getTargetProteins().size() < 1 ) { System.out.println( "INFO: Removing " + reportedPeptide + " from results, does not match a target protein." ); results.remove( reportedPeptide ); } else if( reportedPeptide.getType() == IProphetConstants.LINK_TYPE_CROSSLINK && reportedPeptide.getPeptide2().getTargetProteins().size() < 1) { System.out.println( "INFO: Removing " + reportedPeptide + " from results, does not match a target protein." ); results.remove( reportedPeptide ); } } } return results; }
/** * Build and put in the MatchedProteins element in the XML document. * * @param proxlInput * @param proteinNames * @param peptides * @param fastaFile * @throws Exception */ private void buildMatchedProteinsElement( ProxlInput proxlInput, Collection<String> proteinNames, Collection<String> peptides, File fastaFile, Collection<String> decoyStrings ) throws Exception { Collection<String> sequences = new HashSet<>(); MatchedProteins xmlMatchedProteins = new MatchedProteins(); proxlInput.setMatchedProteins( xmlMatchedProteins ); // iterate over FASTA file, add entries for proteins IDed in the search FASTAReader reader = null; try { reader = FASTAReader.getInstance( fastaFile ); FASTAEntry entry = reader.readNext(); while( entry != null ) { // if this is a decoy entry, skip it if( isDecoyFastaEntry( entry, decoyStrings ) ) { // get the next entry in the FASTA file entry = reader.readNext(); continue; } boolean includeThisEntry = false; for( FASTAHeader header : entry.getHeaders() ) { for( String proteinName : proteinNames ) { // using startsWith instead of equals, since names in the results // may be truncated. if( header.getName().startsWith( proteinName ) ) { includeThisEntry = true; break; } } if( includeThisEntry ) break; } if( includeThisEntry ) { Protein xmlProtein = new Protein(); xmlMatchedProteins.getProtein().add( xmlProtein ); xmlProtein.setSequence( entry.getSequence() ); sequences.add( entry.getSequence() ); for( FASTAHeader header : entry.getHeaders() ) { ProteinAnnotation xmlProteinAnnotation = new ProteinAnnotation(); xmlProtein.getProteinAnnotation().add( xmlProteinAnnotation ); if( header.getDescription() != null ) xmlProteinAnnotation.setDescription( header.getDescription() ); xmlProteinAnnotation.setName( header.getName() ); Integer taxId = GetTaxonomyId.getInstance().getTaxonomyId( header.getName(), header.getDescription() ); if( taxId != null ) xmlProteinAnnotation.setNcbiTaxonomyId( BigInteger.valueOf( taxId ) ); } } // get the next entry in the FASTA file entry = reader.readNext(); } } finally { if( reader != null ) { reader.close(); reader = null; } } // ensure each peptides if found in at least one of the matched proteins' sequences for( String peptide : peptides ) { boolean found = false; for( String protein : sequences ) { if( protein.toLowerCase().contains( peptide.toLowerCase() ) ) { found = true; break; } } if( !found ) throw new Exception( "Could not find peptide sequence (" + peptide + ") in any matched protein..." ); } }
/** * Get a map of the distinct target protein sequences mapped to a collection of target annotations for that sequence * from the given fasta file, where the sequence contains any of the supplied peptide sequences * * @param allPetpideSequences * @param fastaFile * @param decoyIdentifiers * @return * @throws Exception */ private Map<String, Collection<FastaProteinAnnotation>> getProteins( Collection<String> allPetpideSequences, File fastaFile, Collection<String> decoyIdentifiers ) throws Exception { Map<String, Collection<FastaProteinAnnotation>> proteinAnnotations = new HashMap<>(); FASTAReader fastaReader = null; try { fastaReader = FASTAReader.getInstance( fastaFile ); for( FASTAEntry entry = fastaReader.readNext(); entry != null; entry = fastaReader.readNext() ) { if( isDecoyFastaEntry( entry, decoyIdentifiers ) ) continue; for( FASTAHeader header : entry.getHeaders() ) { if( !proteinAnnotations.containsKey( entry.getSequence() ) ) proteinAnnotations.put( entry.getSequence(), new HashSet<FastaProteinAnnotation>() ); FastaProteinAnnotation anno = new FastaProteinAnnotation(); anno.setName( header.getName() ); anno.setDescription( header.getDescription() ); Integer taxId = GetTaxonomyId.getInstance().getTaxonomyId( header.getName(), header.getDescription() ); if( taxId != null ) anno.setTaxonomId( taxId ); proteinAnnotations.get( entry.getSequence() ).add( anno ); } } } finally { if( fastaReader != null ) { fastaReader.close(); fastaReader = null; } } return proteinAnnotations; }
/** * Throws FASTAImporterDataErrorException for data errors * * @param fastaImportTrackingDTO * @throws FASTAImporterDataErrorException for data errors * @throws Exception */ public void validateFASTAFile( FASTAImportTrackingDTO fastaImportTrackingDTO ) throws FASTAImporterDataErrorException, Exception { if ( log.isInfoEnabled() ) { log.info( "Starting Validating request id: " + fastaImportTrackingDTO.getId() + ", uploaded file: " + fastaImportTrackingDTO.getFilename() ); } currentSequenceCount = CURRENT_SEQUENCE_COUNT_NOT_SET; int sequenceCount = 0; // Map<String, Long> headerNameLineNumberMap = new HashMap<>(); String newStatusValidationStarted = ImportStatusContants.STATUS_VALIDATION_STARTED; FASTAImportTrackingDAO.getInstance().updateStatus( newStatusValidationStarted, fastaImportTrackingDTO.getId() ); synchronized ( this ) { // ensure written to main memory fastaImportTrackingDTO.setStatus(newStatusValidationStarted); } Tmp_FASTA_header_name_desc_seq_id_DAO tmp_FASTA_header_name_desc_seq_id_DAO = Tmp_FASTA_header_name_desc_seq_id_DAO.getInstance(); Tmp_FASTA_sequence_DAO tmp_FASTA_sequence_DAO = Tmp_FASTA_sequence_DAO.getInstance(); int tmp_FASTA_header_name_desc_seq_id_RecordsInserted = 0; int tmp_FASTA_sequence_RecordsInserted = 0; File fasta_Importer_Work_Directory = Get_FASTA_Importer_Work_Directory_And_SubDirs.getInstance().get_FASTA_Importer_Work_Directory(); String dirNameForImportTrackingId = Get_FASTA_Importer_Work_Directory_And_SubDirs.getInstance().getDirForImportTrackingId( fastaImportTrackingDTO.getId() ); File dirForImportTrackingId = new File( fasta_Importer_Work_Directory , dirNameForImportTrackingId ); File fastaFile = new File( dirForImportTrackingId, FileNameAndDirectoryNameConstants.UPLOADED_FASTA_FILE ); File tempFilenameForGetTaxonomyIdsProcessing = new File( dirForImportTrackingId, FileNameAndDirectoryNameConstants.DATA_TO_GET_TAXONOMY_IDS_FILE ); Connection tmpValidationDBConnection = null; try { FASTAReader fastaReader = null; IntermediateFileWriter intermediateFileWriter = null; boolean allHeadersHaveUniqueHashCodes = true; //////////////////////////////////////////////// // First read file to get sequence count try { fastaReader = FASTAReader.getInstance( fastaFile ); // Track headerNameHashCodes in first read of file Set<Integer> headerNameHashCodes = new HashSet<>(); while ( true ) { // fastaReader.readNext() throws exception for invalid data format FASTAEntry fastaEntry = null; try { fastaEntry = fastaReader.readNext(); if ( fastaEntry == null ) { // At End Of File break; // EARLY EXIT of LOOP } // Check allHeadersHaveUniqueHashCodes to stop testing // for allHeadersHaveUniqueHashCodes as soon as find first duplicate if ( allHeadersHaveUniqueHashCodes ) { // the headers for this entry Set<FASTAHeader> headers = fastaEntry.getHeaders(); for ( FASTAHeader header : headers ) { String headerName = header.getName(); if ( headerName == null ) { String msg = "Header name cannot be null. HeaderLineNumber " + fastaEntry.getHeaderLineNumber() + ", filename: " + fastaImportTrackingDTO.getFilename(); log.error( msg ); throw new FASTAImporterDataErrorException( msg ); } // Truncate header name, if needed headerName = TruncateHeaderName.truncateHeaderName( headerName ); int headerNameHashCode = headerName.hashCode(); if ( ! headerNameHashCodes.add( headerNameHashCode ) ) { // add returned false so already in the set allHeadersHaveUniqueHashCodes = false; } } } } catch ( FASTADataErrorException e ) { throw e; } catch ( Exception e ) { // log.error( "Exception", e ); throw e; } sequenceCount++; } } finally { if ( fastaReader != null ) { try { fastaReader.close(); fastaReader = null; } catch ( Exception e ) { log.error( "Exception closing fasta file", e ); } } } if ( log.isInfoEnabled() ) { if ( allHeadersHaveUniqueHashCodes ) { log.info("After initial read, allHeadersHaveUniqueHashCodes is true. filename: " + fastaImportTrackingDTO.getFilename() ); } else { log.info("After initial read, allHeadersHaveUniqueHashCodes is false. filename: " + fastaImportTrackingDTO.getFilename() ); } } FASTAImportTrackingDAO.getInstance().updateFastaEntryCount( sequenceCount, fastaImportTrackingDTO.getId() ); totalSequenceCount = sequenceCount; synchronized ( this ) { // Ensure value is written to main memory fastaImportTrackingDTO.setFastaEntryCount( sequenceCount ); } ////////////////////////////////////////// // Read fasta file to perform validation fastaReader = null; try { // Track headerNameHashCodes in main validation read of file Set<Integer> headerNameHashCodes = new HashSet<>(); fastaReader = FASTAReader.getInstance( fastaFile ); intermediateFileWriter = IntermediateFileWriter.getInstance( tempFilenameForGetTaxonomyIdsProcessing ); tmpValidationDBConnection = LockValidationTempTablesDAO.getInstance().lockValidationTempTablesAndReturnDBConnection(); try { tmp_FASTA_header_name_desc_seq_id_DAO.truncate( tmpValidationDBConnection ); } catch ( Exception e ) { log.error( "Exception tmp_FASTA_header_name_desc_seq_id_DAO.truncate( dbConnection )(", e ); } try { tmp_FASTA_sequence_DAO.truncate( tmpValidationDBConnection ); } catch ( Exception e ) { log.error( "Exception tmp_FASTA_sequence_DAO.truncate( dbConnection )", e ); } while ( true ) { // fastaReader.readNext() throws exception for invalid data format FASTAEntry fastaEntry = null; try { fastaEntry = fastaReader.readNext(); } catch ( FASTADataErrorException e ) { throw e; } catch ( Exception e ) { // log.error( "Exception", e ); throw e; } if ( fastaEntry == null ) { // At End Of File break; // EARLY EXIT of LOOP } currentSequenceCount++; // the headers for this entry Set<FASTAHeader> headers = fastaEntry.getHeaders(); String sequenceString = fastaEntry.getSequence(); if ( sequenceString.length() == 0 ) { String msg = "sequence length == zero for id: " + fastaImportTrackingDTO.getId() + ", header line number " + fastaEntry.getHeaderLineNumber(); log.error( msg ); throw new FASTAImporterDataErrorException( msg ); } boolean isSequenceValid = false; try { // validProteinSequence(...) throws FASTAImporterDataErrorException for errors isSequenceValid = FASTAValidator.validProteinSequence( sequenceString ); } catch( FASTAImporterDataErrorException e ) { String msg = e.getMessage() + " Header line number " + fastaEntry.getHeaderLineNumber() + ", sequence: " + sequenceString; // log.error( msg ); throw new FASTAImporterDataErrorException( msg ); } if ( ! isSequenceValid ) { String msg = "Invalid protein sequence" + "for header line number " + fastaEntry.getHeaderLineNumber() + ", sequence: " + sequenceString; // log.error( msg ); throw new FASTAImporterDataErrorException( msg ); } Tmp_FASTA_sequence_DTO tmp_FASTA_sequence_DTO = null; IntermediateFileEntry importFileEntry = null; for ( FASTAHeader header : headers ) { String headerFullString = header.getLine(); String headerName = header.getName(); String headerDescription = header.getDescription(); // Truncate header name, if needed headerName = TruncateHeaderName.truncateHeaderName( headerName ); boolean duplicateHeaderName = false; int headerNameHashCode = headerName.hashCode(); if ( ! headerNameHashCodes.add( headerNameHashCode ) ) { // add returned false so already in the set so check in database List<Tmp_FASTA_header_name_desc_seq_id_DTO> tmp_FASTA_header_name_desc_seq_id_DTOList = tmp_FASTA_header_name_desc_seq_id_DAO.getAllForFastaImportTrackingIdAndHeaderName( fastaImportTrackingDTO.getId(), headerName, tmpValidationDBConnection ); if ( ! tmp_FASTA_header_name_desc_seq_id_DTOList.isEmpty() ) { duplicateHeaderName = true; for ( Tmp_FASTA_header_name_desc_seq_id_DTO item : tmp_FASTA_header_name_desc_seq_id_DTOList ) { // This header name was already processed in this file. // The header name, header description, and sequence must match or it is an error. boolean exactMatch = false; if ( ( headerDescription == null && item.getHeaderDescription() == null ) || ( headerDescription.equals( item.getHeaderDescription() ) ) ) { // descriptions match so compare sequences Tmp_FASTA_sequence_DTO tmp_FASTA_sequence_DTO_ForRetrievedHeaderName = tmp_FASTA_sequence_DAO.getForId( item.getTmpSequenceId(), tmpValidationDBConnection ); if ( tmp_FASTA_sequence_DTO_ForRetrievedHeaderName == null ) { String msg = "Tmp_FASTA_sequence_DTO not found for id " + item.getTmpSequenceId(); log.error( msg ); throw new Exception( msg ); } String sequenceForRetrievedHeaderName = tmp_FASTA_sequence_DTO_ForRetrievedHeaderName.getSequence(); if ( sequenceString.equals( sequenceForRetrievedHeaderName ) ) { exactMatch = true; } } if ( ! exactMatch ) { String msg = "Header name '" + headerName + "' in the file more than once at line numbers " + item.getHeaderLineNumber() + " and " + fastaEntry.getHeaderLineNumber() + "."; // log.error( msg ); throw new FASTAImporterDataErrorException( msg ); } } // Matches found and all are exact matches so no exception thrown } } if ( ! duplicateHeaderName ) { // Not a duplicate header name so process this header. if ( tmp_FASTA_sequence_DTO == null ) { // Sequence not saved yet so save it for duplicate checking tmp_FASTA_sequence_DTO = new Tmp_FASTA_sequence_DTO(); tmp_FASTA_sequence_DTO.setFastaImportTrackingId( fastaImportTrackingDTO.getId() ); tmp_FASTA_sequence_DTO.setHeaderLineNumber( fastaEntry.getHeaderLineNumber() ); tmp_FASTA_sequence_DTO.setSequence( sequenceString ); tmp_FASTA_sequence_DAO.save( tmp_FASTA_sequence_DTO, tmpValidationDBConnection ); tmp_FASTA_sequence_RecordsInserted++; } // Save this header for duplicate checking Tmp_FASTA_header_name_desc_seq_id_DTO tmp_FASTA_header_name_desc_seq_id_DTO = new Tmp_FASTA_header_name_desc_seq_id_DTO(); tmp_FASTA_header_name_desc_seq_id_DTO.setFastaImportTrackingId( fastaImportTrackingDTO.getId() ); tmp_FASTA_header_name_desc_seq_id_DTO.setHeaderLineNumber(fastaEntry.getHeaderLineNumber()); tmp_FASTA_header_name_desc_seq_id_DTO.setTmpSequenceId( tmp_FASTA_sequence_DTO.getId() ); tmp_FASTA_header_name_desc_seq_id_DTO.setHeaderName( headerName ); tmp_FASTA_header_name_desc_seq_id_DTO.setHeaderDescription( headerDescription ); tmp_FASTA_header_name_desc_seq_id_DAO.save( tmp_FASTA_header_name_desc_seq_id_DTO, tmpValidationDBConnection ); tmp_FASTA_header_name_desc_seq_id_RecordsInserted++; /// Add header to output processing if ( importFileEntry == null ) { // First non-duplicate header so create the overall IntermediateFileEntry importFileEntry = new IntermediateFileEntry(); importFileEntry.setHeaderLineNumber( fastaEntry.getHeaderLineNumber() ); importFileEntry.setSequence( sequenceString ); List<IntermediateFileHeaderEntry> importFileHeaderEntryList = new ArrayList<>(); importFileEntry.setImportFileHeaderEntryList( importFileHeaderEntryList ); } List<IntermediateFileHeaderEntry> importFileHeaderEntryList = importFileEntry.getImportFileHeaderEntryList(); IntermediateFileHeaderEntry importFileHeaderEntry = new IntermediateFileHeaderEntry(); importFileHeaderEntry.setHeaderFullString( headerFullString ); importFileHeaderEntry.setHeaderName( headerName ); importFileHeaderEntry.setHeaderDescription( headerDescription ); // importFileHeaderEntry.setTaxonomyId( taxonomyId ); importFileHeaderEntryList.add(importFileHeaderEntry); } } if ( importFileEntry != null ) { intermediateFileWriter.insertToFile( importFileEntry ); } } } finally { if ( fastaReader != null ) { try { fastaReader.close(); } catch ( Exception e ) { log.error( "Exception closing fasta file", e ); } } if ( intermediateFileWriter != null ) { try { intermediateFileWriter.close(); } catch ( Exception e ) { String newStatus = ImportStatusContants.STATUS_SYSTEM_ERROR_PROCESSING_FAILED; fastaImportTrackingDTO.setStatus( newStatus ); FASTAImportTrackingDAO.getInstance().updateStatus( newStatus, fastaImportTrackingDTO.getId() ); log.error( "Exception closing importFileWriter file", e ); if ( StringUtils.isNotEmpty( fastaImportTrackingDTO.getEmail() ) ) { SendEmailSystemError.getInstance().sendEmailSystemError( fastaImportTrackingDTO ); } throw e; } } } String newStatus = ImportStatusContants.STATUS_QUEUED_FOR_FIND_TAX_IDS; FASTAImportTrackingDAO.getInstance().updateStatus( newStatus, fastaImportTrackingDTO.getId() ); fastaImportTrackingDTO.setStatus( newStatus ); } catch ( FASTADataErrorException e ) { GeneralImportErrorDTO generalImportErrorDTO = new GeneralImportErrorDTO(); generalImportErrorDTO.setFastaImportTrackingId( fastaImportTrackingDTO.getId() ); generalImportErrorDTO.setMessage( e.getMessage() ); GeneralImportErrorDAO.getInstance().save(generalImportErrorDTO); String newStatus = ImportStatusContants.STATUS_VALIDATION_FAILED; FASTAImportTrackingDAO.getInstance().updateStatus( newStatus, fastaImportTrackingDTO.getId() ); fastaImportTrackingDTO.setStatus( newStatus ); if ( StringUtils.isNotEmpty( fastaImportTrackingDTO.getEmail() ) ) { SendEmailFailedProcessing.getInstance().sendEmailFailedProcessing( fastaImportTrackingDTO, generalImportErrorDTO ); } log.error( "Data Exception", e ); throw new FASTAImporterDataErrorException( e.getMessage() ); } catch ( FASTAImporterDataErrorException e ) { GeneralImportErrorDTO generalImportErrorDTO = new GeneralImportErrorDTO(); generalImportErrorDTO.setFastaImportTrackingId( fastaImportTrackingDTO.getId() ); generalImportErrorDTO.setMessage( e.getMessage() ); GeneralImportErrorDAO.getInstance().save(generalImportErrorDTO); String newStatus = ImportStatusContants.STATUS_VALIDATION_FAILED; FASTAImportTrackingDAO.getInstance().updateStatus( newStatus, fastaImportTrackingDTO.getId() ); fastaImportTrackingDTO.setStatus( newStatus ); if ( StringUtils.isNotEmpty( fastaImportTrackingDTO.getEmail() ) ) { SendEmailFailedProcessing.getInstance().sendEmailFailedProcessing( fastaImportTrackingDTO, generalImportErrorDTO ); } throw e; } catch ( Exception e ) { GeneralImportErrorDTO generalImportErrorDTO = new GeneralImportErrorDTO(); generalImportErrorDTO.setFastaImportTrackingId( fastaImportTrackingDTO.getId() ); generalImportErrorDTO.setMessage( GeneralImportErrorConstants.GENERAL_IMPORT_ERROR_MESSAGE_SYSTEM_ERROR ); GeneralImportErrorDAO.getInstance().save(generalImportErrorDTO); String newStatus = ImportStatusContants.STATUS_SYSTEM_ERROR_PROCESSING_FAILED; FASTAImportTrackingDAO.getInstance().updateStatus( newStatus, fastaImportTrackingDTO.getId() ); fastaImportTrackingDTO.setStatus( newStatus ); log.error( "Exception", e ); if ( StringUtils.isNotEmpty( fastaImportTrackingDTO.getEmail() ) ) { SendEmailSystemError.getInstance().sendEmailSystemError( fastaImportTrackingDTO ); } throw e; } finally { if ( tmpValidationDBConnection != null ) { try { tmp_FASTA_header_name_desc_seq_id_DAO.truncate( tmpValidationDBConnection ); } catch ( Throwable e ) { log.error( "Exception tmp_FASTA_header_name_desc_seq_id_DAO.truncate( dbConnection )(", e ); } try { tmp_FASTA_sequence_DAO.truncate( tmpValidationDBConnection ); } catch ( Throwable e ) { log.error( "Exception tmp_FASTA_sequence_DAO.truncate( dbConnection )", e ); } try { LockValidationTempTablesDAO.getInstance().unlockAllTableAndCloseConnection( tmpValidationDBConnection ); } catch ( Exception e ) { log.error( "Exception unlocking temp tables or closing connection", e ); } } } if ( log.isInfoEnabled() ) { log.info( "Finished Validating request id: " + fastaImportTrackingDTO.getId() + ", uploaded file: " + fastaImportTrackingDTO.getFilename() + ", tmp_FASTA_header_name_desc_seq_id_RecordsInserted: " + tmp_FASTA_header_name_desc_seq_id_RecordsInserted + ", tmp_FASTA_sequence_RecordsInserted: " + tmp_FASTA_sequence_RecordsInserted ); } }
Usage snippet has been bookmarked! Review your bookmarks
Thank you! Review your likes