MzMLPeaksDecoder.java
/*
* (C) Copyright 2015-2016 by MSDK Development Team
*
* This software is dual-licensed under either
*
* (a) the terms of the GNU Lesser General Public License version 2.1 as published by the Free
* Software Foundation
*
* or (per the licensee's choosing)
*
* (b) the terms of the Eclipse Public License v1.0 as published by the Eclipse Foundation.
*/
package io.github.msdk.io.mzml.data;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Base64;
import java.util.zip.DataFormatException;
import java.util.zip.InflaterInputStream;
import org.apache.commons.io.IOUtils;
import com.google.common.io.LittleEndianDataInputStream;
import io.github.msdk.MSDKException;
import io.github.msdk.io.mzml.util.ByteBufferInputStream;
import io.github.msdk.io.mzml.util.MSNumpress;
/**
* <p>
* MzMLIntensityPeaksDecoder class.
* </p>
*/
public class MzMLPeaksDecoder {
/**
* Converts a base64 encoded mz or intensity string used in mzML files to an array of floats. If
* the original precision was 64 bit, you still get floats as output.
*
* @param binaryDataInfo meta-info about the compressed data
* @throws java.util.zip.DataFormatException if any.
* @throws java.io.IOException if any.
* @return a float array containing the decoded values
* @throws io.github.msdk.MSDKException if any.
* @param inputStream a {@link java.io.InputStream} object.
* @param data an array of float.
*/
public static float[] decodeToFloat(InputStream inputStream, MzMLBinaryDataInfo binaryDataInfo,
float[] data) throws DataFormatException, IOException, MSDKException {
int lengthIn = binaryDataInfo.getEncodedLength();
int numPoints = binaryDataInfo.getArrayLength();
InputStream is = null;
if (inputStream instanceof ByteBufferInputStream) {
ByteBufferInputStream mappedByteBufferInputStream = (ByteBufferInputStream) inputStream;
mappedByteBufferInputStream.constrain(binaryDataInfo.getPosition(), lengthIn);
is = Base64.getDecoder().wrap(mappedByteBufferInputStream);
} else {
is = Base64.getDecoder().wrap(inputStream);
}
// for some reason there sometimes might be zero length <peaks> tags
// (ms2 usually)
// in this case we just return an empty result
if (lengthIn == 0) {
return new float[0];
}
InflaterInputStream iis = null;
LittleEndianDataInputStream dis = null;
byte[] bytes = null;
if (data == null || data.length < numPoints)
data = new float[numPoints];
// first check for zlib compression, inflation must be done before
// NumPress
if (binaryDataInfo.getCompressionType() != null) {
switch (binaryDataInfo.getCompressionType()) {
case ZLIB:
case NUMPRESS_LINPRED_ZLIB:
case NUMPRESS_POSINT_ZLIB:
case NUMPRESS_SHLOGF_ZLIB:
iis = new InflaterInputStream(is);
dis = new LittleEndianDataInputStream(iis);
break;
default:
dis = new LittleEndianDataInputStream(is);
break;
}
// Now we can check for NumPress
int numDecodedDoubles;
switch (binaryDataInfo.getCompressionType()) {
case NUMPRESS_LINPRED:
case NUMPRESS_LINPRED_ZLIB:
bytes = IOUtils.toByteArray(dis);
numDecodedDoubles = MSNumpress.decodeLinear(bytes, bytes.length, data);
if (numDecodedDoubles < 0) {
throw new MSDKException("MSNumpress linear decoder failed");
}
return data;
case NUMPRESS_POSINT:
case NUMPRESS_POSINT_ZLIB:
bytes = IOUtils.toByteArray(dis);
numDecodedDoubles = MSNumpress.decodePic(bytes, bytes.length, data);
if (numDecodedDoubles < 0) {
throw new MSDKException("MSNumpress positive integer decoder failed");
}
return data;
case NUMPRESS_SHLOGF:
case NUMPRESS_SHLOGF_ZLIB:
bytes = IOUtils.toByteArray(dis);
numDecodedDoubles = MSNumpress.decodeSlof(bytes, bytes.length, data);
if (numDecodedDoubles < 0) {
throw new MSDKException("MSNumpress short logged float decoder failed");
}
return data;
default:
break;
}
} else {
dis = new LittleEndianDataInputStream(is);
}
Integer precision;
switch (binaryDataInfo.getBitLength()) {
case THIRTY_TWO_BIT_FLOAT:
case THIRTY_TWO_BIT_INTEGER:
precision = 32;
break;
case SIXTY_FOUR_BIT_FLOAT:
case SIXTY_FOUR_BIT_INTEGER:
precision = 64;
break;
default:
dis.close();
throw new IllegalArgumentException(
"Precision MUST be specified and be either 32-bit or 64-bit, "
+ "if MS-NUMPRESS compression was not used");
}
try {
switch (precision) {
case (32): {
for (int i = 0; i < numPoints; i++) {
data[i] = dis.readFloat();
}
break;
}
case (64): {
for (int i = 0; i < numPoints; i++) {
data[i] = (float) dis.readDouble();
}
break;
}
default: {
dis.close();
throw new IllegalArgumentException(
"Precision can only be 32/64 bits, other values are not valid.");
}
}
} catch (EOFException eof) {
// If the stream reaches EOF unexpectedly, it is probably because the particular
// scan/chromatogram didn't pass the Predicate
throw new MSDKException(
"Couldn't obtain values. Please make sure the scan/chromatogram passes the Predicate.");
} finally {
dis.close();
}
return data;
}
/**
* Converts a base64 encoded mz or intensity string used in mzML files to an array of doubles. If
* the original precision was 32 bit, you still get doubles as output.
*
* @param binaryDataInfo meta-info about encoded data
* @throws java.util.zip.DataFormatException if any.
* @throws java.io.IOException if any.
* @return a double array containing the decoded values
* @throws io.github.msdk.MSDKException if any.
* @param inputStream a {@link java.io.InputStream} object.
* @param data an array of double.
*/
public static double[] decodeToDouble(InputStream inputStream, MzMLBinaryDataInfo binaryDataInfo,
double[] data) throws DataFormatException, IOException, MSDKException {
int lengthIn = binaryDataInfo.getEncodedLength();
int numPoints = binaryDataInfo.getArrayLength();
InputStream is = null;
if (inputStream instanceof ByteBufferInputStream) {
ByteBufferInputStream mappedByteBufferInputStream = (ByteBufferInputStream) inputStream;
mappedByteBufferInputStream.constrain(binaryDataInfo.getPosition(), lengthIn);
is = Base64.getDecoder().wrap(mappedByteBufferInputStream);
} else {
is = Base64.getDecoder().wrap(inputStream);
}
// for some reason there sometimes might be zero length <peaks> tags
// (ms2 usually)
// in this case we just return an empty result
if (lengthIn == 0) {
return new double[0];
}
InflaterInputStream iis = null;
LittleEndianDataInputStream dis = null;
byte[] bytes = null;
if (data == null || data.length < numPoints)
data = new double[numPoints];
// first check for zlib compression, inflation must be done before
// NumPress
if (binaryDataInfo.getCompressionType() != null) {
switch (binaryDataInfo.getCompressionType()) {
case ZLIB:
case NUMPRESS_LINPRED_ZLIB:
case NUMPRESS_POSINT_ZLIB:
case NUMPRESS_SHLOGF_ZLIB:
iis = new InflaterInputStream(is);
dis = new LittleEndianDataInputStream(iis);
break;
default:
dis = new LittleEndianDataInputStream(is);
break;
}
// Now we can check for NumPress
int numDecodedDoubles;
switch (binaryDataInfo.getCompressionType()) {
case NUMPRESS_LINPRED:
case NUMPRESS_LINPRED_ZLIB:
bytes = IOUtils.toByteArray(dis);
numDecodedDoubles = MSNumpress.decodeLinear(bytes, bytes.length, data);
if (numDecodedDoubles < 0) {
throw new MSDKException("MSNumpress linear decoder failed");
}
return data;
case NUMPRESS_POSINT:
case NUMPRESS_POSINT_ZLIB:
bytes = IOUtils.toByteArray(dis);
numDecodedDoubles = MSNumpress.decodePic(bytes, bytes.length, data);
if (numDecodedDoubles < 0) {
throw new MSDKException("MSNumpress positive integer decoder failed");
}
return data;
case NUMPRESS_SHLOGF:
case NUMPRESS_SHLOGF_ZLIB:
bytes = IOUtils.toByteArray(dis);
numDecodedDoubles = MSNumpress.decodeSlof(bytes, bytes.length, data);
if (numDecodedDoubles < 0) {
throw new MSDKException("MSNumpress short logged float decoder failed");
}
return data;
default:
break;
}
} else {
dis = new LittleEndianDataInputStream(is);
}
Integer precision;
switch (binaryDataInfo.getBitLength()) {
case THIRTY_TWO_BIT_FLOAT:
case THIRTY_TWO_BIT_INTEGER:
precision = 32;
break;
case SIXTY_FOUR_BIT_FLOAT:
case SIXTY_FOUR_BIT_INTEGER:
precision = 64;
break;
default:
dis.close();
throw new IllegalArgumentException(
"Precision MUST be specified and be either 32-bit or 64-bit, "
+ "if MS-NUMPRESS compression was not used");
}
try {
switch (precision) {
case (32): {
int asInt;
for (int i = 0; i < numPoints; i++) {
asInt = dis.readInt();
data[i] = Float.intBitsToFloat(asInt);
}
break;
}
case (64): {
long asLong;
for (int i = 0; i < numPoints; i++) {
asLong = dis.readLong();
data[i] = Double.longBitsToDouble(asLong);
}
break;
}
}
} catch (EOFException eof) {
// If the stream reaches EOF unexpectedly, it is probably because the particular
// scan/chromatogram didn't pass the Predicate
throw new MSDKException(
"Couldn't obtain values. Please make sure the scan/chromatogram passes the Predicate.");
} finally {
dis.close();
}
return data;
}
}