Skip to content

Commit

Permalink
#258 Remove dependency on scodec.
Browse files Browse the repository at this point in the history
  • Loading branch information
yruslan committed Jan 8, 2025
1 parent e1ebd95 commit 3346e8d
Show file tree
Hide file tree
Showing 7 changed files with 79 additions and 96 deletions.
13 changes: 6 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -233,23 +233,22 @@ of the dependencies.

#### Getting all Cobrix dependencies

Cobrix's `spark-cobol` data source depends on the COBOL parser that is a part of Cobrix itself and on `scodec` libraries
to decode various binary formats.
Cobrix's `spark-cobol` data source depends on the COBOL parser that is a part of Cobrix itself.

The jars that you need to get are:

* spark-cobol_2.12-2.7.10.jar
* cobol-parser_2.12-2.7.10.jar
* scodec-core_2.12-1.10.3.jar
* scodec-bits_2.12-1.1.4.jar
* spark-cobol_2.12-2.8.0.jar
* cobol-parser_2.12-2.8.0.jar

> Versions older than 2.8.0 also need `scodec-core_2.12-1.10.3.jar` and `scodec-bits_2.12-1.1.4.jar`.
> Versions older than 2.7.1 also need `antlr4-runtime-4.8.jar`.
After that you can specify these jars in `spark-shell` command line. Here is an example:
```
$ spark-shell --packages za.co.absa.cobrix:spark-cobol_2.12:2.7.10
or
$ spark-shell --master yarn --deploy-mode client --driver-cores 4 --driver-memory 4G --jars spark-cobol_2.12-2.7.10.jar,cobol-parser_2.12-2.7.10.jar,scodec-core_2.12-1.10.3.jar,scodec-bits_2.12-1.1.4.jar
$ spark-shell --master yarn --deploy-mode client --driver-cores 4 --driver-memory 4G --jars spark-cobol_2.12-2.8.0.jar,cobol-parser_2.12-2.8.0.jar
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
Expand Down
4 changes: 0 additions & 4 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -151,10 +151,6 @@ lazy val assemblySettings = Seq(
assembly / assemblyShadeRules:= Seq(
// Spark may rely on a different version of ANTLR runtime. Renaming the package helps avoid the binary incompatibility
ShadeRule.rename("org.antlr.**" -> "za.co.absa.cobrix.cobol.parser.shaded.org.antlr.@1").inAll,
// Shading all 3rd party libraries used by 'spark-cobol' in order to avoid binary conflicts.
ShadeRule.rename("macrocompat.**" -> "za.co.absa.cobrix.spark.cobol.shaded.macrocompat.@1").inAll,
ShadeRule.rename("scodec.**" -> "za.co.absa.cobrix.spark.cobol.shaded.scodec.@1").inAll,
ShadeRule.rename("shapeless.**" -> "za.co.absa.cobrix.spark.cobol.shaded.shapeless.@1").inAll,
// The SLF4j API and implementation are provided by Spark
ShadeRule.zap("org.slf4j.**").inAll
),
Expand Down
11 changes: 6 additions & 5 deletions cobol-parser/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,6 @@
<packaging>jar</packaging>

<dependencies>
<!-- binary codecs -->
<dependency>
<groupId>org.scodec</groupId>
<artifactId>scodec-core_${scala.compat.version}</artifactId>
</dependency>
<!-- parser -->
<dependency>
<groupId>org.antlr</groupId>
Expand All @@ -46,6 +41,12 @@
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<!-- binary codecs -->
<dependency>
<groupId>org.scodec</groupId>
<artifactId>scodec-core_${scala.compat.version}</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,13 @@

package za.co.absa.cobrix.cobol.parser.decoders

import scodec.Codec
import scodec.bits.BitVector
import za.co.absa.cobrix.cobol.parser.ast.datatype._
import za.co.absa.cobrix.cobol.parser.common.Constants
import za.co.absa.cobrix.cobol.parser.encoding.{EBCDIC, Encoding}

import scala.util.control.NonFatal

/** Utilites for decoding Cobol binary data files **/
//noinspection RedundantBlock
object BinaryUtils {

lazy val floatB: Codec[Float] = scodec.codecs.float
lazy val floatL: Codec[Float] = scodec.codecs.floatL
lazy val doubleB: Codec[Double] = scodec.codecs.double
lazy val doubleL: Codec[Double] = scodec.codecs.doubleL

/**
* This is the EBCDIC to ASCII conversion table. This is an "invariant" subset of EBCDIC code pages.
* For full EBCDIC code pages support please use [[za.co.absa.cobrix.cobol.parser.encoding.codepage.CodePage]]
Expand Down Expand Up @@ -105,25 +95,6 @@ object BinaryUtils {
/** Convert an ASCII character to EBCDIC */
def asciiToEbcdic(char: Char): Byte = ascii2ebcdic(char.toByte)

/** Get the bit count of a cobol data type
*
* @param codec EBCDIC / ASCII
* @param comp A type of compact stirage
* @param precision The precision (the number of digits) of the type
* @return
*/
def getBitCount(codec: Codec[_ <: AnyVal], comp: Option[Int], precision: Int): Int = {
comp match {
case Some(value) =>
value match {
case compact if compact == 3 =>
(precision + 1) * codec.sizeBound.lowerBound.toInt //bcd
case _ => codec.sizeBound.lowerBound.toInt // bin/float/floatL
}
case None => precision * codec.sizeBound.lowerBound.toInt
}
}

def getBytesCount(compression: Option[Usage], precision: Int, isSigned: Boolean, isExplicitDecimalPt: Boolean, isSignSeparate: Boolean): Int = {
import Constants._
val isRealSigned = if (isSignSeparate) false else isSigned
Expand Down Expand Up @@ -273,32 +244,4 @@ object BinaryUtils {
}
addDecimalPoint(value.toString, scale, scaleFactor)
}

/**
* A decoder for IEEE-754 big endian floats
*
* @param bytes A byte array that represents the binary data
* @return A boxed float
*/
def decodeFloat(bytes: Array[Byte]): java.lang.Float = {
try {
floatB.decode(BitVector(bytes)).require.value
} catch {
case NonFatal(_) => null
}
}

/**
* A decoder for IEEE-754 big endian doubles
*
* @param bytes A byte array that represents the binary data
* @return A boxed double
*/
def decodeDouble(bytes: Array[Byte]): java.lang.Double = {
try {
doubleB.decode(BitVector(bytes)).require.value
} catch {
case NonFatal(_) => null
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,72 @@

package za.co.absa.cobrix.cobol.parser.decoders

import scodec.Codec
import scodec.bits.BitVector

import java.nio.{ByteBuffer, ByteOrder}
import scala.util.control.NonFatal

object FloatingPointDecoders {
private val floatB: Codec[Float] = scodec.codecs.float
private val floatL: Codec[Float] = scodec.codecs.floatL
private val doubleB: Codec[Double] = scodec.codecs.double
private val doubleL: Codec[Double] = scodec.codecs.doubleL

private val BIT_COUNT_MAGIC = 0x000055AFL

/**
* A decoder for IEEE-754 32 bit big endian floats
*
* @param bytes A byte array that represents the binary data
* @return A boxed float
*/
def decodeFloatB(bytes: Array[Byte]): Float = {
require(bytes.length == 4, "Input must be exactly 4 bytes for a 32-bit float")

val byteBuffer = ByteBuffer.wrap(bytes)
byteBuffer.order(ByteOrder.BIG_ENDIAN)
byteBuffer.getFloat
}

/**
* A decoder for IEEE-754 32 bit little endian floats
*
* @param bytes A byte array that represents the binary data
* @return A boxed float
*/
def decodeFloatL(bytes: Array[Byte]): Float = {
require(bytes.length == 4, "Input must be exactly 4 bytes for a 32-bit float")

val byteBuffer = ByteBuffer.wrap(bytes)
byteBuffer.order(ByteOrder.LITTLE_ENDIAN)
byteBuffer.getFloat
}

/**
* A decoder for IEEE-754 64 bit big endian floats
*
* @param bytes A byte array that represents the binary data
* @return A boxed float
*/
def decodeDoubleB(bytes: Array[Byte]): Double = {
require(bytes.length == 8, "Input must be exactly 8 bytes for a 64-bit float")

val byteBuffer = ByteBuffer.wrap(bytes)
byteBuffer.order(ByteOrder.BIG_ENDIAN)
byteBuffer.getDouble
}

/**
* A decoder for IEEE-754 64 bit little endian floats
*
* @param bytes A byte array that represents the binary data
* @return A boxed float
*/
def decodeDoubleL(bytes: Array[Byte]): Double = {
require(bytes.length == 8, "Input must be exactly 8 bytes for a 64-bit float")

val byteBuffer = ByteBuffer.wrap(bytes)
byteBuffer.order(ByteOrder.LITTLE_ENDIAN)
byteBuffer.getDouble
}

/** Decode IEEE754 single precision big endian encoded number. */
def decodeIeee754SingleBigEndian(bytes: Array[Byte]): java.lang.Float = {
try {
floatB.decode(BitVector(bytes)).require.value
decodeFloatB(bytes)
} catch {
case NonFatal(_) => null
}
Expand All @@ -41,7 +90,7 @@ object FloatingPointDecoders {
/** Decode IEEE754 double precision big endian encoded number. */
def decodeIeee754DoubleBigEndian(bytes: Array[Byte]): java.lang.Double = {
try {
doubleB.decode(BitVector(bytes)).require.value
decodeDoubleB(bytes)
} catch {
case NonFatal(_) => null
}
Expand All @@ -50,7 +99,7 @@ object FloatingPointDecoders {
/** Decode IEEE754 single precision little endian encoded number. */
def decodeIeee754SingleLittleEndian(bytes: Array[Byte]): java.lang.Float = {
try {
floatL.decode(BitVector(bytes)).require.value
decodeFloatL(bytes)
} catch {
case NonFatal(_) => null
}
Expand All @@ -59,7 +108,7 @@ object FloatingPointDecoders {
/** Decode IEEE754 double precision little endian encoded number. */
def decodeIeee754DoubleLittleEndian(bytes: Array[Byte]): java.lang.Double = {
try {
doubleL.decode(BitVector(bytes)).require.value
decodeDoubleL(bytes)
} catch {
case NonFatal(_) => null
}
Expand Down
9 changes: 2 additions & 7 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,7 @@
<jackson.version>2.13.1</jackson.version>
<mockito.version>4.11.0</mockito.version>
<scala_logging.version>3.7.2</scala_logging.version>
<scodec_bits.version>1.1.4</scodec_bits.version>
<scodec_core.version>1.10.3</scodec_core.version>
<scodec_core.version>1.11.10</scodec_core.version>
<slf4j.version>1.7.25</slf4j.version>
</properties>

Expand Down Expand Up @@ -227,15 +226,11 @@
<artifactId>jul-to-slf4j</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>org.scodec</groupId>
<artifactId>scodec-bits_${scala.compat.version}</artifactId>
<version>${scodec_bits.version}</version>
</dependency>
<dependency>
<groupId>org.scodec</groupId>
<artifactId>scodec-core_${scala.compat.version}</artifactId>
<version>${scodec_core.version}</version>
<scope>test</scope>
</dependency>

<!-- Test scope dependencies -->
Expand Down
8 changes: 4 additions & 4 deletions project/Dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,14 @@ object Dependencies {

val CobolParserDependencies: Seq[ModuleID] = Seq(
// compile
"org.scodec" %% "scodec-core" % scodecCoreVersion excludeAll(ExclusionRule(organization = "org.scala-lang")),
"org.antlr" % "antlr4-runtime" % antlrValue,
"org.slf4j" % "slf4j-api" % slf4jVersion,

// test
"org.scalatest" %% "scalatest" % scalatestVersion % Test,
"org.mockito" % "mockito-core" % mockitoVersion % Test,
"org.slf4j" % "slf4j-simple" % slf4jVersion % Test
"org.scalatest" %% "scalatest" % scalatestVersion % Test,
"org.mockito" % "mockito-core" % mockitoVersion % Test,
"org.scodec" %% "scodec-core" % scodecCoreVersion % Test,
"org.slf4j" % "slf4j-simple" % slf4jVersion % Test
)

val CobolParserShadedDependencies: Set[ModuleID] = Set(
Expand Down

0 comments on commit 3346e8d

Please sign in to comment.