diff --git a/README.md b/README.md index 7f80ccf1b..31c922d33 100644 --- a/README.md +++ b/README.md @@ -1626,6 +1626,7 @@ The output looks like this: | .option("ebcdic_code_page", "common") | Common | (Default) Only characters common across EBCDIC code pages are decoded. | | .option("ebcdic_code_page", "cp037") | EBCDIC 037 | Australia, Brazil, Canada, New Zealand, Portugal, South Africa, USA. | | .option("ebcdic_code_page", "cp273") | EBCDIC 273 | Germany, Austria. | +| .option("ebcdic_code_page", "cp275") | EBCDIC 275 | Brazil. | | .option("ebcdic_code_page", "cp277") | EBCDIC 277 | Denmark and Norway. | | .option("ebcdic_code_page", "cp278") | EBCDIC 278 | Finland and Sweden. | | .option("ebcdic_code_page", "cp300") | EBCDIC 300 | Double-byte code page with Japanese and Latin characters. | diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala index ad07b52ae..bcc470a1a 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala @@ -46,6 +46,7 @@ object CodePage extends Logging { case "cp037_extended" => new CodePage037Ext case "cp00300" => new CodePage300 // This is the same as cp300 case "cp273" => new CodePage273 + case "cp275" => new CodePage275 case "cp277" => new CodePage277 case "cp278" => new CodePage278 case "cp300" => new CodePage300 diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage275.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage275.scala new file mode 100644 index 000000000..67d07b7c1 --- /dev/null +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage275.scala @@ -0,0 +1,54 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.cobrix.cobol.parser.encoding.codepage + +/** + * EBCDIC code page 277 is used to represent characters of Brazil. + */ +class CodePage275 extends SingleByteCodePage(CodePage275.ebcdicToAsciiMapping) { + override def codePageShortName: String = "cp275" +} + +object CodePage275 { + val ebcdicToAsciiMapping: Array[Char] = { + import EbcdicNonPrintable._ + + /* This is the EBCDIC Code Page 276 to ASCII conversion table + from https://en.wikibooks.org/wiki/Character_Encodings/Code_Tables/EBCDIC/EBCDIC_275 */ + val ebcdic2ascii: Array[Char] = { + // Non-printable characters map used: http://www.pacsys.com/asciitab.htm + Array[Char]( + c00, c01, c02, c03, spc, c09, spc, del, spc, spc, spc, c0b, c0c, ccr, c0e, c0f, // 0 - 15 + c10, c11, c12, c13, spc, nel, c08, spc, c18, c19, spc, spc, c1c, c1d, c1e, c1f, // 16 - 31 + spc, spc, spc, spc, spc, clf, c17, c1b, spc, spc, spc, spc, spc, c05, c06, c07, // 32 - 47 + spc, spc, c16, spc, spc, spc, spc, c04, spc, spc, spc, spc, c14, c15, spc, c1a, // 48 - 63 + ' ', rsp, 'â', 'ä', 'à', 'á', '`', 'å', '¦', 'ñ', 'É', '.', '<', '(', '+', '!', // 64 - 79 + '&', '}', 'ê', 'ë', 'è', 'í', 'î', 'ï', 'ì', 'ß', '$', 'Ç', '*', ')', ';', '^', // 80 - 95 + '-', '/', 'Â', 'Ä', 'À', 'Á', '@', 'Å', ']', 'Ñ', 'ç', ',', '%', '_', '>', '?', // 96 - 111 + 'ø', '[', 'Ê', 'Ë', 'È', 'Í', 'Î', 'Ï', 'Ì', 'ã', ':', 'Õ', 'Ã', qts, '=', qtd, // 112 - 127 + 'Ø', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', '«', '»', 'ð', 'ý', 'þ', '±', // 128 - 143 + '°', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 'ª', 'º', 'æ', '¸', 'Æ', '¤', // 144 - 159 + 'µ', '~', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '¡', '¿', 'Ð', 'Ý', 'Þ', '®', // 160 - 175 + '¢', '£', '¥', '·', '©', '§', '¶', '¼', '½', '¾', '¬', '|', '¯', '¨', '´', '×', // 176 - 191 + 'õ', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', shy, 'ô', 'ö', 'ò', 'ó', '{', // 192 - 207 + 'é', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', '¹', 'û', 'ü', 'ù', 'ú', 'ÿ', // 208 - 223 + bsh, '÷', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '²', 'Ô', 'Ö', 'Ò', 'Ó', '#', // 224 - 239 + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '³', 'Û', 'Ü', 'Ù', 'Ú', spc) // 240 - 255 + } + ebcdic2ascii + } +} diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/decoders/StringDecodersSpec.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/decoders/StringDecodersSpec.scala index c50df900d..d4c76cc81 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/decoders/StringDecodersSpec.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/decoders/StringDecodersSpec.scala @@ -104,6 +104,17 @@ class StringDecodersSpec extends AnyWordSpec { assert(actual == expected) } + "decode a CP275 string special characters" in { + val expected = " æÄ!üÜ^Æö$ߢ§Øäéõø~åÖÕÃÅÉ\\¤] " + val bytes = Array(0x40, 0x9C, 0x63, 0x4F, 0xDC, 0xFC, 0x5F, 0x9E, 0xCC, 0x5A, 0x59, + 0xB0, 0xB5, 0x80, 0x43, 0xD0, 0xC0, 0x70, 0xA1, 0x47, 0xEC, 0x7B, 0x7C, 0x67, 0x4A, + 0xE0, 0x9F, 0x68, 0x40).map(_.toByte) + + val actual = decodeEbcdicString(bytes, KeepAll, new CodePage275, improvedNullDetection = false) + + assert(actual == expected) + } + "decode a CP277 string special characters" in { val expected = " {Ä!~Ü^[ö¤ß¢§@äåæ¦ü}ÖÆØ$#\\] " val bytes = Array(0x40, 0x9C, 0x63, 0x4F, 0xDC, 0xFC, 0x5F, 0x9E, 0xCC, 0x5A, 0x59, diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePageSingleByteSpec.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePageSingleByteSpec.scala index 320e49328..a74595472 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePageSingleByteSpec.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePageSingleByteSpec.scala @@ -47,6 +47,11 @@ class CodePageSingleByteSpec extends AnyFunSuite { assert(codePage.codePageShortName == "cp273") } + test("Ensure codepage 'cp275' gives the associated CodePage") { + val codePage = CodePage.getCodePageByName("cp275") + assert(codePage.codePageShortName == "cp275") + } + test("Ensure codepage 'cp277' gives the associated CodePage") { val codePage = CodePage.getCodePageByName("cp277") assert(codePage.codePageShortName == "cp277")