MzMLFileImportMethod.java
/*
* (C) Copyright 2015-2016 by MSDK Development Team
*
* This software is dual-licensed under either
*
* (a) the terms of the GNU Lesser General Public License version 2.1 as published by the Free
* Software Foundation
*
* or (per the licensee's choosing)
*
* (b) the terms of the Eclipse Public License v1.0 as published by the Eclipse Foundation.
*/
package io.github.msdk.io.mzml;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Path;
import java.util.function.Predicate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import io.github.msdk.MSDKException;
import io.github.msdk.MSDKMethod;
import io.github.msdk.datamodel.chromatograms.Chromatogram;
import io.github.msdk.datamodel.rawdata.MsScan;
import io.github.msdk.datamodel.rawdata.RawDataFile;
import io.github.msdk.io.mzml.data.MzMLParser;
import io.github.msdk.io.mzml.data.MzMLRawDataFile;
import io.github.msdk.io.mzml.util.ByteBufferInputStream;
import io.github.msdk.io.mzml.util.FileMemoryMapper;
import javolution.text.CharArray;
import javolution.xml.internal.stream.XMLStreamReaderImpl;
import javolution.xml.stream.XMLStreamConstants;
import javolution.xml.stream.XMLStreamException;
/**
* <p>
* This class contains methods which parse data in MzML format from {@link java.io.File File},
* {@link java.nio.file.Path Path} or {@link java.io.InputStream InputStream} <br>
* {@link io.github.msdk.datamodel.rawdata.MsScan Scan}s and
* {@link io.github.msdk.datamodel.chromatograms.Chromatogram Chromatogram}s will be parsed, and the
* values pre-loaded when the {@link java.util.function.Predicate Predicate} is passed. Other
* {@link io.github.msdk.datamodel.rawdata.MsScan Scan}s and
* {@link io.github.msdk.datamodel.chromatograms.Chromatogram Chromatogram}s can be loaded on demand
* if the source is a {@link java.io.File File}, whereas, they will be dropped if the source is an
* {@link java.io.InputStream InputStream}
* </p>
*/
public class MzMLFileImportMethod implements MSDKMethod<RawDataFile> {
private final File mzMLFile;
final InputStream inputStream;
private MzMLRawDataFile newRawFile;
private volatile boolean canceled;
private Float progress;
private int lastLoggedProgress;
private Logger logger;
private Predicate<MsScan> msScanPredicate = s -> true;
private Predicate<Chromatogram> chromatogramPredicate = c -> true;
/**
* <p>
* Constructor for MzMLFileImportMethod.
* </p>
*
* @param mzMLFilePath a {@link java.lang.String String} which contains the absolute path to the
* MzML File.
*/
public MzMLFileImportMethod(String mzMLFilePath) {
this(new File(mzMLFilePath), s -> true, c -> true);
}
/**
* <p>
* Constructor for MzMLFileImportMethod.
* </p>
*
* @param mzMLFilePath a {@link java.lang.String String} which contains the absolute path to the
* MzML File.
* @param msScanPredicate Only {@link io.github.msdk.datamodel.rawdata.MsScan MsScan}s which pass
* this predicate will be parsed by the parser and added to the
* {@link io.github.msdk.io.mzml.data.MzMLRawDataFile RawDataFile} returned by the
* {@link #getResult() getResult()} method.
* @param chromatogramPredicate Only {@link io.github.msdk.datamodel.chromatograms.Chromatogram
* Chromatogram}s which pass this predicate will be parsed by the parser and added to the
* {@link io.github.msdk.io.mzml.data.MzMLRawDataFile RawDataFile} returned by the
* {@link #getResult() getResult()} method.
*/
public MzMLFileImportMethod(String mzMLFilePath, Predicate<MsScan> msScanPredicate,
Predicate<Chromatogram> chromatogramPredicate) {
this(new File(mzMLFilePath), msScanPredicate, chromatogramPredicate);
}
/**
* <p>
* Constructor for MzMLFileImportMethod.
* </p>
*
* @param mzMLFilePath a {@link java.nio.file.Path Path} object which contains the path to the
* MzML File.
*/
public MzMLFileImportMethod(Path mzMLFilePath) {
this(mzMLFilePath.toFile(), s -> false, c -> false);
}
/**
* <p>
* Constructor for MzMLFileImportMethod.
* </p>
*
* @param mzMLFilePath a {@link java.nio.file.Path Path} object which contains the path to the
* MzML File.
* @param msScanPredicate Only {@link io.github.msdk.datamodel.rawdata.MsScan MsScan}s which pass
* this predicate will be parsed by the parser and added to the
* {@link io.github.msdk.io.mzml.data.MzMLRawDataFile RawDataFile} returned by the
* {@link #getResult() getResult()} method.
* @param chromatogramPredicate Only {@link io.github.msdk.datamodel.chromatograms.Chromatogram
* Chromatogram}s which pass this predicate will be parsed by the parser and added to the
* {@link io.github.msdk.io.mzml.data.MzMLRawDataFile RawDataFile} returned by the
* {@link #getResult() getResult()} method.
*/
public MzMLFileImportMethod(Path mzMLFilePath, Predicate<MsScan> msScanPredicate,
Predicate<Chromatogram> chromatogramPredicate) {
this(mzMLFilePath.toFile(), msScanPredicate, chromatogramPredicate);
}
/**
* <p>
* Constructor for MzMLFileImportMethod.
* </p>
*
* @param mzMLFile a {@link java.io.File File} object instance of the MzML File.
*/
public MzMLFileImportMethod(File mzMLFile) {
this(mzMLFile, null, s -> false, c -> false);
}
/**
* <p>
* Constructor for MzMLFileImportMethod.
* </p>
*
* @param mzMLFile a {@link java.io.File File} object instance of the MzML File.
* @param msScanPredicate Only {@link io.github.msdk.datamodel.rawdata.MsScan MsScan}s which pass
* this predicate will be parsed by the parser and added to the
* {@link io.github.msdk.io.mzml.data.MzMLRawDataFile RawDataFile} returned by the
* {@link #getResult() getResult()} method.
* @param chromatogramPredicate Only {@link io.github.msdk.datamodel.chromatograms.Chromatogram
* Chromatogram}s which pass this predicate will be parsed by the parser and added to the
* {@link io.github.msdk.io.mzml.data.MzMLRawDataFile RawDataFile} returned by the
* {@link #getResult() getResult()} method.
*/
public MzMLFileImportMethod(File mzMLFile, Predicate<MsScan> msScanPredicate,
Predicate<Chromatogram> chromatogramPredicate) {
this(mzMLFile, null, msScanPredicate, chromatogramPredicate);
}
/**
* <p>
* Constructor for MzMLFileImportMethod.
* </p>
*
* @param inputStream an {@link java.io.InputStream InputStream} which contains data in MzML
* format.
*/
public MzMLFileImportMethod(InputStream inputStream) {
this(null, inputStream, s -> true, c -> true);
}
/**
* <p>
* Constructor for MzMLFileImportMethod.
* </p>
*
* @param inputStream an {@link java.io.InputStream InputStream} which contains data in MzML
* format.
* @param msScanPredicate Only {@link io.github.msdk.datamodel.rawdata.MsScan MsScan}s which pass
* this predicate will be parsed by the parser and added to the
* {@link io.github.msdk.io.mzml.data.MzMLRawDataFile RawDataFile} returned by the
* {@link #getResult() getResult()} method.
* @param chromatogramPredicate Only {@link io.github.msdk.datamodel.chromatograms.Chromatogram
* Chromatogram}s which pass this predicate will be parsed by the parser and added to the
* {@link io.github.msdk.io.mzml.data.MzMLRawDataFile RawDataFile} returned by the
* {@link #getResult() getResult()} method.
*/
public MzMLFileImportMethod(InputStream inputStream, Predicate<MsScan> msScanPredicate,
Predicate<Chromatogram> chromatogramPredicate) {
this(null, inputStream, msScanPredicate, chromatogramPredicate);
}
/**
* <p>
* Internal constructor used to initialize instances of this object using other constructors.
* </p>
*/
private MzMLFileImportMethod(File mzMLFile, InputStream inputStream,
Predicate<MsScan> msScanPredicate, Predicate<Chromatogram> chromatogramPredicate) {
this.mzMLFile = mzMLFile;
this.inputStream = inputStream;
this.canceled = false;
this.progress = 0f;
this.lastLoggedProgress = 0;
this.logger = LoggerFactory.getLogger(this.getClass());
this.msScanPredicate = this.msScanPredicate.and(msScanPredicate);
this.chromatogramPredicate = this.chromatogramPredicate.and(chromatogramPredicate);
}
/**
* {@inheritDoc}
*
* <p>
* Parse the MzML data and return the parsed data
* </p>
*/
@Override
public MzMLRawDataFile execute() throws MSDKException {
try {
InputStream is = null;
if (mzMLFile != null) {
logger.info("Began parsing file: " + mzMLFile.getAbsolutePath());
is = FileMemoryMapper.mapToMemory(mzMLFile);
} else if (inputStream != null) {
logger.info("Began parsing file from stream");
is = inputStream;
} else {
throw new MSDKException("Invalid input");
}
// It's ok to directly create this particular reader, this class is `public final`
// and we precisely want that fast UFT-8 reader implementation
final XMLStreamReaderImpl xmlStreamReader = new XMLStreamReaderImpl();
xmlStreamReader.setInput(is, "UTF-8");
MzMLParser parser = new MzMLParser(this);
this.newRawFile = parser.getMzMLRawFile();
lastLoggedProgress = 0;
int eventType;
try {
do {
// check if parsing has been cancelled?
if (canceled)
return null;
eventType = xmlStreamReader.next();
// XXX Can't track progress this way now, switched to using the primitive InputStream
// without the length() function
// Update: We can track progress if source is a file
if (mzMLFile != null)
progress = ((float) (xmlStreamReader.getLocation().getCharacterOffset())
/ ((ByteBufferInputStream) is).length());
// Log progress after every 10% completion
if ((int) (progress * 100) >= lastLoggedProgress + 10) {
lastLoggedProgress = (int) (progress * 10) * 10;
logger.debug("Parsing in progress... " + lastLoggedProgress + "% completed");
}
switch (eventType) {
case XMLStreamConstants.START_ELEMENT:
final CharArray openingTagName = xmlStreamReader.getLocalName();
parser.processOpeningTag(xmlStreamReader, is, openingTagName);
break;
case XMLStreamConstants.END_ELEMENT:
final CharArray closingTagName = xmlStreamReader.getLocalName();
parser.processClosingTag(xmlStreamReader, closingTagName);
break;
case XMLStreamConstants.CHARACTERS:
parser.processCharacters(xmlStreamReader);
break;
}
} while (eventType != XMLStreamConstants.END_DOCUMENT);
} finally {
if (xmlStreamReader != null)
xmlStreamReader.close();
}
progress = 1f;
logger.info("Parsing Complete");
} catch (IOException | XMLStreamException e) {
throw (new MSDKException(e));
}
progress = 1f;
return newRawFile;
}
/** {@inheritDoc} */
@Override
public Float getFinishedPercentage() {
return progress;
}
/** {@inheritDoc} */
@Override
public RawDataFile getResult() {
return newRawFile;
}
/** {@inheritDoc} */
@Override
public void cancel() {
this.canceled = true;
}
/**
* <p>Getter for the field <code>msScanPredicate</code>.</p>
*
* @return {@link java.util.function.Predicate Predicate} specified for
* {@link io.github.msdk.datamodel.rawdata.MsScan MsScan}s <br>
* The {@link java.util.function.Predicate Predicate} evaluates to true always, if it
* wasn't specified on initialization
*/
public Predicate<MsScan> getMsScanPredicate() {
return msScanPredicate;
}
/**
* <p>Getter for the field <code>chromatogramPredicate</code>.</p>
*
* @return {@link java.util.function.Predicate Predicate} specified for
* {@link io.github.msdk.datamodel.chromatograms.Chromatogram Chromatogram}s <br>
* The {@link java.util.function.Predicate Predicate} evaluates to true always, if it
* wasn't specified on initialization
*/
public Predicate<Chromatogram> getChromatogramPredicate() {
return chromatogramPredicate;
}
/**
* <p>Getter for the field <code>mzMLFile</code>.</p>
*
* @return a {@link java.io.File File} instance of the MzML source if being read from a file <br>
* null if the MzML source is an {@link java.io.InputStream InputStream}
*/
public File getMzMLFile() {
return mzMLFile;
}
}