diff --git a/README.md b/README.md
index 448ef6b43..60d91e107 100644
--- a/README.md
+++ b/README.md
@@ -233,15 +233,14 @@ of the dependencies.
#### Getting all Cobrix dependencies
-Cobrix's `spark-cobol` data source depends on the COBOL parser that is a part of Cobrix itself and on `scodec` libraries
-to decode various binary formats.
+Cobrix's `spark-cobol` data source depends on the COBOL parser that is a part of Cobrix itself.
The jars that you need to get are:
-* spark-cobol_2.12-2.7.10.jar
-* cobol-parser_2.12-2.7.10.jar
-* scodec-core_2.12-1.10.3.jar
-* scodec-bits_2.12-1.1.4.jar
+* spark-cobol_2.12-2.8.0.jar
+* cobol-parser_2.12-2.8.0.jar
+
+> Versions older than 2.8.0 also need `scodec-core_2.12-1.10.3.jar` and `scodec-bits_2.12-1.1.4.jar`.
> Versions older than 2.7.1 also need `antlr4-runtime-4.8.jar`.
@@ -249,7 +248,7 @@ After that you can specify these jars in `spark-shell` command line. Here is an
```
$ spark-shell --packages za.co.absa.cobrix:spark-cobol_2.12:2.7.10
or
-$ spark-shell --master yarn --deploy-mode client --driver-cores 4 --driver-memory 4G --jars spark-cobol_2.12-2.7.10.jar,cobol-parser_2.12-2.7.10.jar,scodec-core_2.12-1.10.3.jar,scodec-bits_2.12-1.1.4.jar
+$ spark-shell --master yarn --deploy-mode client --driver-cores 4 --driver-memory 4G --jars spark-cobol_2.12-2.8.0.jar,cobol-parser_2.12-2.8.0.jar
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
diff --git a/build.sbt b/build.sbt
index 04ee8097c..4dd7493cc 100644
--- a/build.sbt
+++ b/build.sbt
@@ -151,10 +151,6 @@ lazy val assemblySettings = Seq(
assembly / assemblyShadeRules:= Seq(
// Spark may rely on a different version of ANTLR runtime. Renaming the package helps avoid the binary incompatibility
ShadeRule.rename("org.antlr.**" -> "za.co.absa.cobrix.cobol.parser.shaded.org.antlr.@1").inAll,
- // Shading all 3rd party libraries used by 'spark-cobol' in order to avoid binary conflicts.
- ShadeRule.rename("macrocompat.**" -> "za.co.absa.cobrix.spark.cobol.shaded.macrocompat.@1").inAll,
- ShadeRule.rename("scodec.**" -> "za.co.absa.cobrix.spark.cobol.shaded.scodec.@1").inAll,
- ShadeRule.rename("shapeless.**" -> "za.co.absa.cobrix.spark.cobol.shaded.shapeless.@1").inAll,
// The SLF4j API and implementation are provided by Spark
ShadeRule.zap("org.slf4j.**").inAll
),
diff --git a/cobol-parser/pom.xml b/cobol-parser/pom.xml
index 21a4460e3..c19dd66ea 100644
--- a/cobol-parser/pom.xml
+++ b/cobol-parser/pom.xml
@@ -30,11 +30,6 @@
jar
-
-
- org.scodec
- scodec-core_${scala.compat.version}
-
org.antlr
@@ -46,6 +41,12 @@
org.slf4j
slf4j-api
+
+
+ org.scodec
+ scodec-core_${scala.compat.version}
+ test
+
org.slf4j
slf4j-simple
diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/BinaryUtils.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/BinaryUtils.scala
index 21efb7c5d..0567fc1a1 100644
--- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/BinaryUtils.scala
+++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/BinaryUtils.scala
@@ -16,23 +16,13 @@
package za.co.absa.cobrix.cobol.parser.decoders
-import scodec.Codec
-import scodec.bits.BitVector
import za.co.absa.cobrix.cobol.parser.ast.datatype._
import za.co.absa.cobrix.cobol.parser.common.Constants
import za.co.absa.cobrix.cobol.parser.encoding.{EBCDIC, Encoding}
-import scala.util.control.NonFatal
-
/** Utilites for decoding Cobol binary data files **/
//noinspection RedundantBlock
object BinaryUtils {
-
- lazy val floatB: Codec[Float] = scodec.codecs.float
- lazy val floatL: Codec[Float] = scodec.codecs.floatL
- lazy val doubleB: Codec[Double] = scodec.codecs.double
- lazy val doubleL: Codec[Double] = scodec.codecs.doubleL
-
/**
* This is the EBCDIC to ASCII conversion table. This is an "invariant" subset of EBCDIC code pages.
* For full EBCDIC code pages support please use [[za.co.absa.cobrix.cobol.parser.encoding.codepage.CodePage]]
@@ -105,25 +95,6 @@ object BinaryUtils {
/** Convert an ASCII character to EBCDIC */
def asciiToEbcdic(char: Char): Byte = ascii2ebcdic(char.toByte)
- /** Get the bit count of a cobol data type
- *
- * @param codec EBCDIC / ASCII
- * @param comp A type of compact stirage
- * @param precision The precision (the number of digits) of the type
- * @return
- */
- def getBitCount(codec: Codec[_ <: AnyVal], comp: Option[Int], precision: Int): Int = {
- comp match {
- case Some(value) =>
- value match {
- case compact if compact == 3 =>
- (precision + 1) * codec.sizeBound.lowerBound.toInt //bcd
- case _ => codec.sizeBound.lowerBound.toInt // bin/float/floatL
- }
- case None => precision * codec.sizeBound.lowerBound.toInt
- }
- }
-
def getBytesCount(compression: Option[Usage], precision: Int, isSigned: Boolean, isExplicitDecimalPt: Boolean, isSignSeparate: Boolean): Int = {
import Constants._
val isRealSigned = if (isSignSeparate) false else isSigned
@@ -273,32 +244,4 @@ object BinaryUtils {
}
addDecimalPoint(value.toString, scale, scaleFactor)
}
-
- /**
- * A decoder for IEEE-754 big endian floats
- *
- * @param bytes A byte array that represents the binary data
- * @return A boxed float
- */
- def decodeFloat(bytes: Array[Byte]): java.lang.Float = {
- try {
- floatB.decode(BitVector(bytes)).require.value
- } catch {
- case NonFatal(_) => null
- }
- }
-
- /**
- * A decoder for IEEE-754 big endian doubles
- *
- * @param bytes A byte array that represents the binary data
- * @return A boxed double
- */
- def decodeDouble(bytes: Array[Byte]): java.lang.Double = {
- try {
- doubleB.decode(BitVector(bytes)).require.value
- } catch {
- case NonFatal(_) => null
- }
- }
}
diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/FloatingPointDecoders.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/FloatingPointDecoders.scala
index 0ceb54250..2972c18f5 100644
--- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/FloatingPointDecoders.scala
+++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/FloatingPointDecoders.scala
@@ -16,23 +16,72 @@
package za.co.absa.cobrix.cobol.parser.decoders
-import scodec.Codec
-import scodec.bits.BitVector
-
+import java.nio.{ByteBuffer, ByteOrder}
import scala.util.control.NonFatal
object FloatingPointDecoders {
- private val floatB: Codec[Float] = scodec.codecs.float
- private val floatL: Codec[Float] = scodec.codecs.floatL
- private val doubleB: Codec[Double] = scodec.codecs.double
- private val doubleL: Codec[Double] = scodec.codecs.doubleL
-
private val BIT_COUNT_MAGIC = 0x000055AFL
+ /**
+ * A decoder for IEEE-754 32 bit big endian floats
+ *
+ * @param bytes A byte array that represents the binary data
+ * @return A boxed float
+ */
+ def decodeFloatB(bytes: Array[Byte]): Float = {
+ require(bytes.length == 4, "Input must be exactly 4 bytes for a 32-bit float")
+
+ val byteBuffer = ByteBuffer.wrap(bytes)
+ byteBuffer.order(ByteOrder.BIG_ENDIAN)
+ byteBuffer.getFloat
+ }
+
+ /**
+ * A decoder for IEEE-754 32 bit little endian floats
+ *
+ * @param bytes A byte array that represents the binary data
+ * @return A boxed float
+ */
+ def decodeFloatL(bytes: Array[Byte]): Float = {
+ require(bytes.length == 4, "Input must be exactly 4 bytes for a 32-bit float")
+
+ val byteBuffer = ByteBuffer.wrap(bytes)
+ byteBuffer.order(ByteOrder.LITTLE_ENDIAN)
+ byteBuffer.getFloat
+ }
+
+ /**
+ * A decoder for IEEE-754 64 bit big endian floats
+ *
+ * @param bytes A byte array that represents the binary data
+ * @return A boxed float
+ */
+ def decodeDoubleB(bytes: Array[Byte]): Double = {
+ require(bytes.length == 8, "Input must be exactly 8 bytes for a 64-bit float")
+
+ val byteBuffer = ByteBuffer.wrap(bytes)
+ byteBuffer.order(ByteOrder.BIG_ENDIAN)
+ byteBuffer.getDouble
+ }
+
+ /**
+ * A decoder for IEEE-754 64 bit little endian floats
+ *
+ * @param bytes A byte array that represents the binary data
+ * @return A boxed float
+ */
+ def decodeDoubleL(bytes: Array[Byte]): Double = {
+ require(bytes.length == 8, "Input must be exactly 8 bytes for a 64-bit float")
+
+ val byteBuffer = ByteBuffer.wrap(bytes)
+ byteBuffer.order(ByteOrder.LITTLE_ENDIAN)
+ byteBuffer.getDouble
+ }
+
/** Decode IEEE754 single precision big endian encoded number. */
def decodeIeee754SingleBigEndian(bytes: Array[Byte]): java.lang.Float = {
try {
- floatB.decode(BitVector(bytes)).require.value
+ decodeFloatB(bytes)
} catch {
case NonFatal(_) => null
}
@@ -41,7 +90,7 @@ object FloatingPointDecoders {
/** Decode IEEE754 double precision big endian encoded number. */
def decodeIeee754DoubleBigEndian(bytes: Array[Byte]): java.lang.Double = {
try {
- doubleB.decode(BitVector(bytes)).require.value
+ decodeDoubleB(bytes)
} catch {
case NonFatal(_) => null
}
@@ -50,7 +99,7 @@ object FloatingPointDecoders {
/** Decode IEEE754 single precision little endian encoded number. */
def decodeIeee754SingleLittleEndian(bytes: Array[Byte]): java.lang.Float = {
try {
- floatL.decode(BitVector(bytes)).require.value
+ decodeFloatL(bytes)
} catch {
case NonFatal(_) => null
}
@@ -59,7 +108,7 @@ object FloatingPointDecoders {
/** Decode IEEE754 double precision little endian encoded number. */
def decodeIeee754DoubleLittleEndian(bytes: Array[Byte]): java.lang.Double = {
try {
- doubleL.decode(BitVector(bytes)).require.value
+ decodeDoubleL(bytes)
} catch {
case NonFatal(_) => null
}
diff --git a/pom.xml b/pom.xml
index 7a50dbb90..08be4d936 100644
--- a/pom.xml
+++ b/pom.xml
@@ -117,8 +117,7 @@
2.13.1
4.11.0
3.7.2
- 1.1.4
- 1.10.3
+ 1.11.10
1.7.25
@@ -227,15 +226,11 @@
jul-to-slf4j
${slf4j.version}
-
- org.scodec
- scodec-bits_${scala.compat.version}
- ${scodec_bits.version}
-
org.scodec
scodec-core_${scala.compat.version}
${scodec_core.version}
+ test
diff --git a/project/Dependencies.scala b/project/Dependencies.scala
index e67651c9b..cb7449204 100644
--- a/project/Dependencies.scala
+++ b/project/Dependencies.scala
@@ -72,14 +72,14 @@ object Dependencies {
val CobolParserDependencies: Seq[ModuleID] = Seq(
// compile
- "org.scodec" %% "scodec-core" % scodecCoreVersion excludeAll(ExclusionRule(organization = "org.scala-lang")),
"org.antlr" % "antlr4-runtime" % antlrValue,
"org.slf4j" % "slf4j-api" % slf4jVersion,
// test
- "org.scalatest" %% "scalatest" % scalatestVersion % Test,
- "org.mockito" % "mockito-core" % mockitoVersion % Test,
- "org.slf4j" % "slf4j-simple" % slf4jVersion % Test
+ "org.scalatest" %% "scalatest" % scalatestVersion % Test,
+ "org.mockito" % "mockito-core" % mockitoVersion % Test,
+ "org.scodec" %% "scodec-core" % scodecCoreVersion % Test,
+ "org.slf4j" % "slf4j-simple" % slf4jVersion % Test
)
val CobolParserShadedDependencies: Set[ModuleID] = Set(