Skip to content

Commit

Permalink
#263 Add support for CP500 EBCDIC codepage.
Browse files Browse the repository at this point in the history
  • Loading branch information
yruslan committed Jan 16, 2024
1 parent c6609df commit 7b19d16
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ object CodePage extends Logging {
case "cp00300" => new CodePage300 // This is the same as cp300
case "cp273" => new CodePage273
case "cp300" => new CodePage300
case "cp500" => new CodePage500
case "cp838" => new CodePage838
case "cp870" => new CodePage870
case "cp875" => new CodePage875
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Copyright 2018 ABSA Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package za.co.absa.cobrix.cobol.parser.encoding.codepage

/**
* EBCDIC code page 500. Belgium, Canada, Switzerland, International.
*/
class CodePage500 extends SingleByteCodePage(CodePage500.ebcdicToAsciiMapping) {
override def codePageShortName: String = "cp500"
}

object CodePage500 {
val ebcdicToAsciiMapping: Array[Char] = {
import EbcdicNonPrintable._

/* This is the EBCDIC Code Page 500 to ASCII conversion table
from https://de.m.wikipedia.org/wiki/Extended_Binary_Coded_Decimal_Interchange_Code */
val ebcdic2ascii: Array[Char] = {
// Non-printable characters map used: http://www.pacsys.com/asciitab.htm
Array[Char](
c00, c01, c02, c03, spc, c09, spc, del, spc, spc, spc, c0b, c0c, ccr, c0e, c0f, // 0 - 15
c10, c11, c12, c13, spc, nel, c08, spc, c18, c19, spc, spc, c1c, c1d, c1e, c1f, // 16 - 31
spc, spc, spc, spc, spc, clf, c17, c1b, spc, spc, spc, spc, spc, c05, c06, c07, // 32 - 47
spc, spc, c16, spc, spc, spc, spc, c04, spc, spc, spc, spc, c14, c15, spc, c1a, // 48 - 63
' ', rsp, 'â', 'ä', 'à', 'á', 'ã', 'å', 'ç', 'ñ', '[', '.', '<', '(', '+', '!', // 64 - 79
'&', 'é', 'ê', 'ë', 'è', 'í', 'î', 'ï', 'ì', 'ß', ']', '$', '*', ')', ';', '^', // 80 - 95
'-', '/', 'Â', 'Ä', 'À', 'Á', 'Ã', 'Å', 'Ç', 'Ñ', '¦', ',', '%', '_', '>', '?', // 96 - 111
'ø', 'É', 'Ê', 'Ë', 'È', 'Í', 'Î', 'Ï', 'Ì', '`', ':', '#', '@', qts, '=', qtd, // 112 - 127
'Ø', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', '«', '»', 'ð', 'ý', 'þ', '±', // 128 - 143
'°', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 'ª', 'º', 'æ', '¸', 'Æ', '¤', // 144 - 159
'µ', '~', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '¡', '¿', 'Ð', 'Ý', 'Þ', '®', // 160 - 175
'¢', '£', '¥', '·', '©', '§', '¶', '¼', '½', '¾', '¬', '|', '¯', '¨', '´', '×', // 176 - 191
'{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', shy, 'ô', 'ö', 'ò', 'ó', 'õ', // 192 - 207
'}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', '¹', 'û', 'ü', 'ù', 'ú', 'ÿ', // 208 - 223
bsh, '÷', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '²', 'Ô', 'Ö', 'Ò', 'Ó', 'Õ', // 224 - 239
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '³', 'Û', 'Ü', 'Ù', 'Ú', spc) // 240 - 255
}
ebcdic2ascii
}
}
60 changes: 60 additions & 0 deletions data/test9_expected/test9_cp500.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
{"CURRENCY":"GBP","SIGNATURE":"S9276511","COMPANY_NAME_NP":"©û\u0012¯á3Z ÑT ¼Å8\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":1,"AMOUNT":36.88}
{"CURRENCY":"CZK","SIGNATURE":"S9276511","COMPANY_NAME_NP":"Ùçc¤ ³®\u001A Aº\u0014ëd\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":59.80}
{"CURRENCY":"USD","SIGNATURE":"S9276511","COMPANY_NAME_NP":"´\u0011#ñ#B \" :æ­à\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":767.31}
{"CURRENCY":"CHF","SIGNATURE":"S9276511","COMPANY_NAME_NP":"ZÁ\u0007Ì0z \u0003\u0015eCjú \u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":873.44}
{"CURRENCY":"CHF","SIGNATURE":"S9276511","COMPANY_NAME_NP":"8oÂÔàÒ\u0010\u001F«Ð· \"ç\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":1,"AMOUNT":39.71}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"¿\u001C |Æ»ýY)Ô%@ÌÒ\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":1,"AMOUNT":536.19}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"Ëù±l¥Ù Ë¢o _²<\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":1,"AMOUNT":346.57}
{"CURRENCY":"EUR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"é/Få©\u000BèÙªÎÓÖ¦\n\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":471.30}
{"CURRENCY":"GBP","SIGNATURE":"S9276511","COMPANY_NAME_NP":"\u001F¶ýB}óþ\fF#«RÇÙ\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":287.83}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"Ý ëºê6\u0016 %±Çì\u000E \u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":1,"AMOUNT":60112.00}
{"CURRENCY":"USD","SIGNATURE":"S9276511","COMPANY_NAME_NP":" ïêÖj% ®¯\\¶ X\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":1,"AMOUNT":539.59}
{"CURRENCY":"GBP","SIGNATURE":"S9276511","COMPANY_NAME_NP":"q- (â:ýÓRneê9\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":857.62}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"r\u0002\u0005Äjõ3ö~:?^Ùæ\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":1,"AMOUNT":400.08}
{"CURRENCY":"EUR","SIGNATURE":"S9276511","COMPANY_NAME_NP":" Ø \u0005Á(çñiÁ®?Gä\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":245.88}
{"CURRENCY":"CZK","SIGNATURE":"S9276511","COMPANY_NAME_NP":"ëÈ\u0004ºÞí eà%=\u001AÇý\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":1,"AMOUNT":416.49}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"»PBÅ,\u0015çmP ²s=r\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":538.66}
{"CURRENCY":"CZK","SIGNATURE":"S9276511","COMPANY_NAME_NP":"\u0004MËÃ>D\u0004w3ã5Y| \u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":26062.00}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"XeËøò)Zâäáæx\u001BW\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":467.90}
{"CURRENCY":"CZK","SIGNATURE":"S9276511","COMPANY_NAME_NP":" f×Ó7Ot R¾÷ß f\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":1,"AMOUNT":995.71}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":":|{q­`\u0010\u001Fì¾D\n\t\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":1,"AMOUNT":8569982.00}
{"CURRENCY":"CZK","SIGNATURE":"S9276511","COMPANY_NAME_NP":"Æŵ_Dé)FQz#?Äc\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":5620.54}
{"CURRENCY":"EUR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"ó³\b=Â\u0010\u000E\tø siñ\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":688.71}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"T7Æ h£o Þ\u0012[ûþª\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":1,"AMOUNT":477.11}
{"CURRENCY":"USD","SIGNATURE":"S9276511","COMPANY_NAME_NP":"z§\u0018\u001EPül\u0002ÆSì) 2\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":1,"AMOUNT":93.93}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"ãiJe\u0007ÖYV¨ ÷\u0017¾ \u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":533.47}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"@B\u0016Jÿû+q &·çé \u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":535.39}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"8 ±H\u0003õ.\nÅI1\u001FÒD\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":1,"AMOUNT":996.36}
{"CURRENCY":"CYN","SIGNATURE":"S9276511","COMPANY_NAME_NP":"@a¬<®#6¸\t?ýÔ\u001At\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":1,"AMOUNT":826.76}
{"CURRENCY":"CAD","SIGNATURE":"S9276511","COMPANY_NAME_NP":"Ûò(éì\nE \\\u0013L ¢z\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":39642.00}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"e\u0006\u0000 »:@@ \u001F\u0016±h»\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":9261333.00}
{"CURRENCY":"EUR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"Ç+n\u0013ßUã|9b¹oÀÒ\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":1,"AMOUNT":20.79}
{"CURRENCY":"CHF","SIGNATURE":"S9276511","COMPANY_NAME_NP":"\u000Eª¼»   @$;f¤Ü\u000E\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":961.32}
{"CURRENCY":"EUR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"Þºô°¾'Â\u0006§w¥ÞyE\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":150.26}
{"CURRENCY":"CAD","SIGNATURE":"S9276511","COMPANY_NAME_NP":":(ÖadùòǶ@üSð¯\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":951.71}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"¾\u001AW%SG\u0011Bö A5>\r\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":94.65}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"\u0001 u^xSQqaëja, \u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":880.76}
{"CURRENCY":"EUR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"ø¯É É; s>±- æ\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":1,"AMOUNT":767.09}
{"CURRENCY":"EUR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"u\u000E:0 \u0001Jä ûI\u0005¥à\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":996.09}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"°\u0019+äÏeºW¡èØ þ \u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":960.96}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"¹4>Vê P-9hA5\b\u001F\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":891.81}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":" Xi:\n '\u0015% \u0019Éé\b\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":827.68}
{"CURRENCY":"CAD","SIGNATURE":"S9276511","COMPANY_NAME_NP":"¸É)ìz{ÝmáË S< \u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":347.75}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"ù Â\u001E¯·DæÙÙ\u0010@2W\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":122.36}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"\"2\u000F¸Dt\u0013p ÓMÜk4\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":959.54}
{"CURRENCY":"USD","SIGNATURE":"S9276511","COMPANY_NAME_NP":"]áÖ\u0014ñ°­ý¬iw h5\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":868.91}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"ì ÉòúÏ\u0012\u001D§\u001C çq \u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":413.20}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"s õ WpÆ «\u0007ÀýQ\u0012\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":5125370.00}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"\n\u000Em !â0ùûì[AX®\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":1,"AMOUNT":666.98}
{"CURRENCY":"CHF","SIGNATURE":"S9276511","COMPANY_NAME_NP":"\u0014öråt]\u0001$ï3 ó6\u0006\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":442.62}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":" Gh\u000B+ ,Ú$·…Ð\u0000 \u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":45558.00}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"k+å¬ hE\u0006¢9M\u0007é\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":405.18}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"3Þ{ Ó\b&ÿ n =ÎÂ\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":1,"AMOUNT":37.25}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":" \rì\u0010+h\u0002Äò¸=Tª \u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":1,"AMOUNT":668.85}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":" ×Ì\u001A©ÔÖûæ…G5\u0003×\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":78475.00}
{"CURRENCY":"GBP","SIGNATURE":"S9276511","COMPANY_NAME_NP":"nP\u0012I}f eäJ£õ¢\u0011\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":1,"AMOUNT":809.40}
{"CURRENCY":"USD","SIGNATURE":"S9276511","COMPANY_NAME_NP":" Õ3.½W«j/6²\u001AãÈ\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":1,"AMOUNT":823.73}
{"CURRENCY":"ZAR","SIGNATURE":"S9276511","COMPANY_NAME_NP":"rKhh·Eþ\u001E¶¨\u000BqÃ\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":447.38}
{"CURRENCY":"GBP","SIGNATURE":"S9276511","COMPANY_NAME_NP":"Ï~máwxQu-\f?\u0000+ü\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":815.87}
{"CURRENCY":"CYN","SIGNATURE":"S9276511","COMPANY_NAME_NP":"\u0002Æf\"¨' ×\u000Foä\"ÁÞ\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":3892137.00}
{"CURRENCY":"USD","SIGNATURE":"S9276511","COMPANY_NAME_NP":"%\u0011hÝ6 \u0012TÑ0+p\u0000","COMPANY_ID":"00000000\u0000\u0000","WEALTH_QFY":0,"AMOUNT":300.76}
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,18 @@ class Test9CodePages extends AnyFunSuite with SparkTestBase {
"string_trimming_policy" -> "none" :: Nil)
}

test(s"Integration test on $exampleName code page CP273 extended") {
test(s"Integration test on $exampleName code page CP273") {
runTest("_cp273",
"ebcdic_code_page" -> "cp273" ::
"string_trimming_policy" -> "none" :: Nil)
}

test(s"Integration test on $exampleName code page CP500") {
runTest("_cp500",
"ebcdic_code_page" -> "cp500" ::
"string_trimming_policy" -> "none" :: Nil)
}

test(s"Integration test on $exampleName custom code page") {
runTest("_cp_custom",
"ebcdic_code_page_class" -> "za.co.absa.cobrix.spark.cobol.source.utils.CustomCodePage" ::
Expand Down

0 comments on commit 7b19d16

Please sign in to comment.