From cdb1b09eb415a5e18ccfde939b6266dfbf02f14f Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Mon, 23 May 2022 07:53:42 +0300 Subject: [PATCH 001/113] Added jenkinsfile --- .ci/Dockerfile.build | 6 ++++++ Jenkinsfile | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 .ci/Dockerfile.build create mode 100644 Jenkinsfile diff --git a/.ci/Dockerfile.build b/.ci/Dockerfile.build new file mode 100644 index 0000000..7bcf6c5 --- /dev/null +++ b/.ci/Dockerfile.build @@ -0,0 +1,6 @@ +# Build set_ambient +FROM python:3.7-alpine + +ENV LC_ALL=C + +RUN pip install databricks-cli requests \ No newline at end of file diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 0000000..0ea6850 --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,18 @@ + +pipeline { + agent { + dockerfile { + filename '.ci/Dockerfile.build' + } + } + + stages { + stage('Copy notebooks to Databricks') { + steps { + script { + databricks workspace import_dir -o "./databricks/python" "/Shared/Spark OCR/tests/" --profile mykola + } + } + } + } +} From f063637d58e9fb066102b5782a10837c408c5432 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Mon, 23 May 2022 18:13:55 +0300 Subject: [PATCH 002/113] Updated jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 0ea6850..430a487 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -10,7 +10,7 @@ pipeline { stage('Copy notebooks to Databricks') { steps { script { - databricks workspace import_dir -o "./databricks/python" "/Shared/Spark OCR/tests/" --profile mykola + sh('databricks workspace import_dir -o "./databricks/python" "/Shared/Spark OCR/tests/"') } } } From 8f0b3ee18e9dec32219658029e2c8447389b6fcf Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Mon, 23 May 2022 20:56:54 +0300 Subject: [PATCH 003/113] Updated jenkinsfile --- Jenkinsfile | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index 430a487..aaa5b12 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,4 +1,6 @@ +def DBTOKEN = "DATABRICKS_TOKEN" + pipeline { agent { dockerfile { @@ -7,6 +9,22 @@ pipeline { } stages { + stage('Setup') { + withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { + sh """#!/bin/bash + # Configure Databricks CLI for deployment + echo "${DBURL} + $TOKEN" | databricks configure --token + + # Configure Databricks Connect for testing + echo "${DBURL} + $TOKEN + ${CLUSTERID} + 0 + 15001" | databricks-connect configure + """ + } + } stage('Copy notebooks to Databricks') { steps { script { From 918de40578b9e6337aa0e4e554d3e72d180f13dc Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Mon, 23 May 2022 20:58:57 +0300 Subject: [PATCH 004/113] Updated jenkinsfile --- Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile b/Jenkinsfile index aaa5b12..b6c0224 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,3 +1,4 @@ +@Library('jenkinslib')_ def DBTOKEN = "DATABRICKS_TOKEN" From e4dd7e0f37fdaa7f389c0b8f03cfc91d4d2e2298 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Mon, 23 May 2022 21:05:48 +0300 Subject: [PATCH 005/113] Updated jenkinsfile --- Jenkinsfile | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index b6c0224..466aff2 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -11,19 +11,21 @@ pipeline { stages { stage('Setup') { - withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { - sh """#!/bin/bash - # Configure Databricks CLI for deployment - echo "${DBURL} - $TOKEN" | databricks configure --token + script { + withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { + sh """#!/bin/bash + # Configure Databricks CLI for deployment + echo "${DBURL} + $TOKEN" | databricks configure --token - # Configure Databricks Connect for testing - echo "${DBURL} - $TOKEN - ${CLUSTERID} - 0 - 15001" | databricks-connect configure - """ + # Configure Databricks Connect for testing + echo "${DBURL} + $TOKEN + ${CLUSTERID} + 0 + 15001" | databricks-connect configure + """ + } } } stage('Copy notebooks to Databricks') { From 09a7171b3edba7fb65266c7d481f63beff63c723 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Mon, 23 May 2022 21:08:49 +0300 Subject: [PATCH 006/113] Updated jenkinsfile --- Jenkinsfile | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 466aff2..cc161bb 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -11,20 +11,22 @@ pipeline { stages { stage('Setup') { - script { - withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { - sh """#!/bin/bash - # Configure Databricks CLI for deployment - echo "${DBURL} - $TOKEN" | databricks configure --token + steps { + script { + withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { + sh """#!/bin/bash + # Configure Databricks CLI for deployment + echo "${DBURL} + $TOKEN" | databricks configure --token - # Configure Databricks Connect for testing - echo "${DBURL} - $TOKEN - ${CLUSTERID} - 0 - 15001" | databricks-connect configure - """ + # Configure Databricks Connect for testing + echo "${DBURL} + $TOKEN + ${CLUSTERID} + 0 + 15001" | databricks-connect configure + """ + } } } } From b3d93a197cf33a251a45aef13d18be0350e5ac0b Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 24 May 2022 08:05:41 +0300 Subject: [PATCH 007/113] Updated jenkinsfile --- Jenkinsfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index cc161bb..3e7adae 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,6 +1,8 @@ @Library('jenkinslib')_ def DBTOKEN = "DATABRICKS_TOKEN" +def DBURL = "https://dbc-6ca13d9d-74bb.cloud.databricks.com/" +def CLUSTERID = "0428-112519-vaxgi8gx" pipeline { agent { From 917788cff2f66bdeb05301edcbd3bdc787caecf5 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 24 May 2022 08:13:22 +0300 Subject: [PATCH 008/113] Updated jenkinsfile --- Jenkinsfile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 3e7adae..66ddbb2 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -10,6 +10,9 @@ pipeline { filename '.ci/Dockerfile.build' } } + environment { + DBTOKEN_CREDS = credentials(DBTOKEN) + } stages { stage('Setup') { @@ -19,11 +22,11 @@ pipeline { sh """#!/bin/bash # Configure Databricks CLI for deployment echo "${DBURL} - $TOKEN" | databricks configure --token + $DBTOKEN_CREDS" | databricks configure --token # Configure Databricks Connect for testing echo "${DBURL} - $TOKEN + $DBTOKEN_CREDS ${CLUSTERID} 0 15001" | databricks-connect configure From 9998301bba2b4546a2cbe9dc492ff16783858dbb Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 24 May 2022 08:15:15 +0300 Subject: [PATCH 009/113] Updated jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 66ddbb2..3341d27 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -11,7 +11,7 @@ pipeline { } } environment { - DBTOKEN_CREDS = credentials(DBTOKEN) + DBTOKEN_CREDS = credentials("DATABRICKS_TOKEN") } stages { From 3535cdb2c97532b627ba2888ee84cc83e0062d83 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 24 May 2022 08:23:31 +0300 Subject: [PATCH 010/113] Updated jenkinsfile --- Jenkinsfile | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 3341d27..e9fac14 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -10,16 +10,12 @@ pipeline { filename '.ci/Dockerfile.build' } } - environment { - DBTOKEN_CREDS = credentials("DATABRICKS_TOKEN") - } - stages { stage('Setup') { steps { script { withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { - sh """#!/bin/bash + sh '''#!/bin/bash # Configure Databricks CLI for deployment echo "${DBURL} $DBTOKEN_CREDS" | databricks configure --token @@ -30,7 +26,7 @@ pipeline { ${CLUSTERID} 0 15001" | databricks-connect configure - """ + ''' } } } From 9953f9a603e99a6b0b7b6957e655fa886074acf0 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 24 May 2022 08:27:31 +0300 Subject: [PATCH 011/113] Updated jenkinsfile --- Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index e9fac14..cd1f448 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -15,7 +15,7 @@ pipeline { steps { script { withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { - sh '''#!/bin/bash + sh(script: '''#!/bin/bash # Configure Databricks CLI for deployment echo "${DBURL} $DBTOKEN_CREDS" | databricks configure --token @@ -26,7 +26,7 @@ pipeline { ${CLUSTERID} 0 15001" | databricks-connect configure - ''' + ''') } } } From 9e15fba540268125f4a136fe3f60f140ff49af58 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 24 May 2022 08:29:31 +0300 Subject: [PATCH 012/113] Updated jenkinsfile --- Jenkinsfile | 7 ------- 1 file changed, 7 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index cd1f448..a45dc97 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -19,13 +19,6 @@ pipeline { # Configure Databricks CLI for deployment echo "${DBURL} $DBTOKEN_CREDS" | databricks configure --token - - # Configure Databricks Connect for testing - echo "${DBURL} - $DBTOKEN_CREDS - ${CLUSTERID} - 0 - 15001" | databricks-connect configure ''') } } From fd5cb03e9dbcad4e0613999606c093da3cabb39c Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 24 May 2022 08:57:05 +0300 Subject: [PATCH 013/113] Updated jenkinsfile --- Jenkinsfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index a45dc97..bae4c94 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -17,8 +17,7 @@ pipeline { withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { sh(script: '''#!/bin/bash # Configure Databricks CLI for deployment - echo "${DBURL} - $DBTOKEN_CREDS" | databricks configure --token + echo "${DBURL} $DBTOKEN_CREDS" ''') } } From 3c970d24c596aeccb58c941adedebad427f2b743 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 24 May 2022 19:01:49 +0300 Subject: [PATCH 014/113] Updated jenkinsfile --- Jenkinsfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index bae4c94..69564fc 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -15,9 +15,7 @@ pipeline { steps { script { withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { - sh(script: '''#!/bin/bash - # Configure Databricks CLI for deployment - echo "${DBURL} $DBTOKEN_CREDS" + sh(script: '''echo "test" ''') } } From 2e101c6ff765365de97d12c8cf78b6f2291fcae5 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 24 May 2022 19:04:44 +0300 Subject: [PATCH 015/113] Updated jenkinsfile --- Jenkinsfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 69564fc..85ac534 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -15,7 +15,8 @@ pipeline { steps { script { withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { - sh(script: '''echo "test" + sh(script: '''echo "${DBURL} +$DBTOKEN_CREDS" | databricks configure --token ''') } } From 8793722648766576663472976d647a9c74420c5a Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 24 May 2022 19:09:54 +0300 Subject: [PATCH 016/113] Updated jenkinsfile --- Jenkinsfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 85ac534..df5bd95 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -15,7 +15,8 @@ pipeline { steps { script { withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { - sh(script: '''echo "${DBURL} + sh(script: '''#!/bin/bash +echo "${DBURL} $DBTOKEN_CREDS" | databricks configure --token ''') } From 8a0932daea74c3fb295953db9fda31765ccb6d7c Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 24 May 2022 19:13:03 +0300 Subject: [PATCH 017/113] Updated jenkinsfile --- Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index df5bd95..177fa3f 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -15,9 +15,9 @@ pipeline { steps { script { withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { - sh(script: '''#!/bin/bash + sh(script: ''' echo "${DBURL} -$DBTOKEN_CREDS" | databricks configure --token +$DBTOKEN_CREDS" > databricks configure --token ''') } } From 792c604533d26ffd1354b28c41b0175079972f42 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 24 May 2022 19:36:08 +0300 Subject: [PATCH 018/113] Updated jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 177fa3f..bdd8be0 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -17,7 +17,7 @@ pipeline { withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { sh(script: ''' echo "${DBURL} -$DBTOKEN_CREDS" > databricks configure --token +$TOKEN" | databricks configure --token ''') } } From 971ea63c2fea7a660c278ff7f6ed9b5b22abbe90 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 24 May 2022 19:46:54 +0300 Subject: [PATCH 019/113] Updated jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index bdd8be0..fd3d516 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -16,7 +16,7 @@ pipeline { script { withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { sh(script: ''' -echo "${DBURL} +echo "$DBURL $TOKEN" | databricks configure --token ''') } From 7675238e44a0a2f62e72dc45aa76c9cfb4ca8468 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 24 May 2022 19:48:51 +0300 Subject: [PATCH 020/113] Updated jenkinsfile --- Jenkinsfile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index fd3d516..42880fd 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -15,10 +15,9 @@ pipeline { steps { script { withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { - sh(script: ''' + sh(script: '''#!/bin/bash echo "$DBURL -$TOKEN" | databricks configure --token - ''') +$TOKEN" | databricks configure --token''') } } } From 8e407bb43ddd31f9a22b082e6404d03801a4f6d2 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 24 May 2022 19:55:22 +0300 Subject: [PATCH 021/113] Updated jenkinsfile --- Jenkinsfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 42880fd..b6f4f2d 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -15,9 +15,10 @@ pipeline { steps { script { withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { - sh(script: '''#!/bin/bash + def output = sh(returnStdout: true, script: '''#!/bin/bash echo "$DBURL $TOKEN" | databricks configure --token''') + echo $output } } } From 6c70af86b71bef2a27f159823111d6ce6a7f6ba6 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 25 May 2022 19:38:21 +0300 Subject: [PATCH 022/113] Updated jenkinsfile --- Jenkinsfile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index b6f4f2d..a8158f4 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -15,10 +15,9 @@ pipeline { steps { script { withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { - def output = sh(returnStdout: true, script: '''#!/bin/bash -echo "$DBURL -$TOKEN" | databricks configure --token''') - echo $output + sh(script: '''#!/bin/bash + echo "$TOKEN" > secret.txt + databricks configure --token-file secret.txt --host $DBURL''') } } } From e9bfb609cfca9b1827a320c28cb3d40760145abf Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 25 May 2022 19:41:20 +0300 Subject: [PATCH 023/113] Updated jenkinsfile --- Jenkinsfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index a8158f4..e05bf78 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -15,9 +15,9 @@ pipeline { steps { script { withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { - sh(script: '''#!/bin/bash - echo "$TOKEN" > secret.txt - databricks configure --token-file secret.txt --host $DBURL''') + sh('''#!/bin/bash + echo "${TOKEN}" > secret.txt + databricks configure --token-file secret.txt --host ${DBURL}''') } } } From 6e322a65b1f11d94327f54cc4e4fd297a3414b2e Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 25 May 2022 19:42:30 +0300 Subject: [PATCH 024/113] Updated jenkinsfile --- Jenkinsfile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index e05bf78..3c2527b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -15,9 +15,8 @@ pipeline { steps { script { withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { - sh('''#!/bin/bash - echo "${TOKEN}" > secret.txt - databricks configure --token-file secret.txt --host ${DBURL}''') + sh('echo "${TOKEN}" > secret.txt') + sh('databricks configure --token-file secret.txt --host ${DBURL}') } } } From 0ab14dcba4f56343bf80994c5110812164451188 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 25 May 2022 19:44:40 +0300 Subject: [PATCH 025/113] Updated jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 3c2527b..5ebb55a 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -16,7 +16,7 @@ pipeline { script { withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { sh('echo "${TOKEN}" > secret.txt') - sh('databricks configure --token-file secret.txt --host ${DBURL}') + sh("databricks configure --token-file secret.txt --host ${DBURL}") } } } From 5a91bb21f99fc68ec1c90bce25087a8b91d2f8dc Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 25 May 2022 19:54:59 +0300 Subject: [PATCH 026/113] Updated jenkinsfile --- Jenkinsfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index 5ebb55a..e0258bc 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -10,6 +10,9 @@ pipeline { filename '.ci/Dockerfile.build' } } + environment { + DATABRICKS_CONFIG_FILE = ".databricks.cfg" + } stages { stage('Setup') { steps { From 09fd219af3e9740a698d42d74050d1879aa55b4c Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 26 May 2022 17:33:34 +0300 Subject: [PATCH 027/113] Updated Jenkinsfile --- Jenkinsfile | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index e0258bc..4b2349c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,8 +1,13 @@ @Library('jenkinslib')_ def DBTOKEN = "DATABRICKS_TOKEN" -def DBURL = "https://dbc-6ca13d9d-74bb.cloud.databricks.com/" +def DBURL = "https://dbc-6ca13d9d-74bb.cloud.databricks.com" def CLUSTERID = "0428-112519-vaxgi8gx" +def SCRIPTPATH = "./.ci" +def NOTEBOOKPATH = "./databricks/python" +def WORKSPACEPATH = "/Shared/Spark OCR/tests" +def OUTFILEPATH = "." +def TESTRESULTPATH = "." pipeline { agent { @@ -27,9 +32,29 @@ pipeline { stage('Copy notebooks to Databricks') { steps { script { - sh('databricks workspace import_dir -o "./databricks/python" "/Shared/Spark OCR/tests/"') + sh('databricks workspace import_dir -o "${NOTEBOOKPATH}" "${WORKSPACEPATH}"') } } } + stage('Run Notebook Tests') { + withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { + sh '''python3 ${SCRIPTPATH}/executenotebook.py --workspace=${DBURL}\ + --token=$TOKEN\ + --clusterid=${CLUSTERID}\ + --localpath=${NOTEBOOKPATH}\ + --workspacepath=${WORKSPACEPATH}\ + --outfilepath=${OUTFILEPATH} + ''' + sh '''sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py + python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py || true + ''' + } + stage('Report Test Results') { + sh """find ${OUTFILEPATH} -name '*.json' -exec gzip --verbose {} \\; + touch ${TESTRESULTPATH}/TEST-*.xml + """ + junit "**/reports/junit/*.xml" + } + } } } From c1e440aacebab030176fb4ae67abf80c65f6fc75 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 26 May 2022 17:34:52 +0300 Subject: [PATCH 028/113] Updated Jenkinsfile --- .ci/evaluatenotebookruns.py | 43 +++++++++++++++ .ci/executenotebook.py | 106 ++++++++++++++++++++++++++++++++++++ 2 files changed, 149 insertions(+) create mode 100644 .ci/evaluatenotebookruns.py create mode 100644 .ci/executenotebook.py diff --git a/.ci/evaluatenotebookruns.py b/.ci/evaluatenotebookruns.py new file mode 100644 index 0000000..3169bb2 --- /dev/null +++ b/.ci/evaluatenotebookruns.py @@ -0,0 +1,43 @@ +# evaluatenotebookruns.py +import unittest +import json +import glob +import os + +class TestJobOutput(unittest.TestCase): + + test_output_path = './tests/res' + + # def test_performance(self): + # path = self.test_output_path + # statuses = [] + # + # for filename in glob.glob(os.path.join(path, '*.json')): + # print('Evaluating: ' + filename) + # data = json.load(open(filename)) + # duration = data['execution_duration'] + # if duration > 100000: + # status = 'FAILED' + # else: + # status = 'SUCCESS' + # + # statuses.append(status) + # + # self.assertFalse('FAILED' in statuses) + + + def test_job_run(self): + path = self.test_output_path + statuses = [] + + + for filename in glob.glob(os.path.join(path, '*.json')): + print('Evaluating: ' + filename) + data = json.load(open(filename)) + status = data['state']['result_state'] + statuses.append(status) + + self.assertFalse('FAILED' in statuses) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/.ci/executenotebook.py b/.ci/executenotebook.py new file mode 100644 index 0000000..619a139 --- /dev/null +++ b/.ci/executenotebook.py @@ -0,0 +1,106 @@ +# executenotebook.py +#!/usr/bin/python3 +import json +import requests +import os +import sys +import getopt +import time + + +def main(): + workspace = '' + token = '' + clusterid = '' + localpath = '' + workspacepath = '' + outfilepath = '' + + try: + opts, args = getopt.getopt(sys.argv[1:], 'hs:t:c:lwo', + ['workspace=', 'token=', 'clusterid=', 'localpath=', 'workspacepath=', 'outfilepath=']) + except getopt.GetoptError: + print( + 'executenotebook.py -s -t -c -l -w -o )') + sys.exit(2) + + for opt, arg in opts: + if opt == '-h': + print( + 'executenotebook.py -s -t -c -l -w -o ') + sys.exit() + elif opt in ('-s', '--workspace'): + workspace = arg + elif opt in ('-t', '--token'): + token = arg + elif opt in ('-c', '--clusterid'): + clusterid = arg + elif opt in ('-l', '--localpath'): + localpath = arg + elif opt in ('-w', '--workspacepath'): + workspacepath = arg + elif opt in ('-o', '--outfilepath'): + outfilepath = arg + + print('-s is ' + workspace) + print('-t is ' + token) + print('-c is ' + clusterid) + print('-l is ' + localpath) + print('-w is ' + workspacepath) + print('-o is ' + outfilepath) + # Generate array from walking local path + + notebooks = [] + for path, subdirs, files in os.walk(localpath): + for name in files: + fullpath = path + '/' + name + # removes localpath to repo but keeps workspace path + fullworkspacepath = workspacepath + path.replace(localpath, '') + + name, file_extension = os.path.splitext(fullpath) + if file_extension.lower() in ['.ipynb']: + row = [fullpath, fullworkspacepath, 1] + notebooks.append(row) + + # run each element in list + for notebook in notebooks: + nameonly = os.path.basename(notebook[0]) + workspacepath = notebook[1] + + name, file_extension = os.path.splitext(nameonly) + + # workpath removes extension + fullworkspacepath = workspacepath + '/' + name + + print('Running job for:' + fullworkspacepath) + values = {'run_name': name, 'existing_cluster_id': clusterid, 'timeout_seconds': 3600, 'notebook_task': {'notebook_path': fullworkspacepath}} + + resp = requests.post(workspace + '/api/2.0/jobs/runs/submit', + data=json.dumps(values), auth=("token", token)) + runjson = resp.text + print("runjson:" + runjson) + d = json.loads(runjson) + runid = d['run_id'] + + i=0 + waiting = True + while waiting: + time.sleep(10) + jobresp = requests.get(workspace + '/api/2.0/jobs/runs/get?run_id='+str(runid), + data=json.dumps(values), auth=("token", token)) + jobjson = jobresp.text + print("jobjson:" + jobjson) + j = json.loads(jobjson) + current_state = j['state']['life_cycle_state'] + runid = j['run_id'] + if current_state in ['TERMINATED', 'INTERNAL_ERROR', 'SKIPPED'] or i >= 12: + break + i=i+1 + + if outfilepath != '': + file = open(outfilepath + '/' + str(runid) + '.json', 'w') + file.write(json.dumps(j)) + file.close() + +if __name__ == '__main__': + main() \ No newline at end of file From b3bc04ced98090a59c50aef1604e2c930831c8aa Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 26 May 2022 17:36:15 +0300 Subject: [PATCH 029/113] Updated Jenkinsfile --- Jenkinsfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 4b2349c..dff66d2 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -48,7 +48,8 @@ pipeline { sh '''sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py || true ''' - } + } + } stage('Report Test Results') { sh """find ${OUTFILEPATH} -name '*.json' -exec gzip --verbose {} \\; touch ${TESTRESULTPATH}/TEST-*.xml From 4cf132101bdce60cc011325bff2369d79ea161e4 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 26 May 2022 17:38:29 +0300 Subject: [PATCH 030/113] Updated Jenkinsfile --- Jenkinsfile | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index dff66d2..3cb20f6 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -37,24 +37,31 @@ pipeline { } } stage('Run Notebook Tests') { - withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { - sh '''python3 ${SCRIPTPATH}/executenotebook.py --workspace=${DBURL}\ - --token=$TOKEN\ - --clusterid=${CLUSTERID}\ - --localpath=${NOTEBOOKPATH}\ - --workspacepath=${WORKSPACEPATH}\ - --outfilepath=${OUTFILEPATH} - ''' - sh '''sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py - python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py || true - ''' - } + steps { + script { + withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { + sh '''python3 ${SCRIPTPATH}/executenotebook.py --workspace=${DBURL}\ + --token=$TOKEN\ + --clusterid=${CLUSTERID}\ + --localpath=${NOTEBOOKPATH}\ + --workspacepath=${WORKSPACEPATH}\ + --outfilepath=${OUTFILEPATH} + ''' + sh '''sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py + python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py || true + ''' + } + } + } } stage('Report Test Results') { - sh """find ${OUTFILEPATH} -name '*.json' -exec gzip --verbose {} \\; - touch ${TESTRESULTPATH}/TEST-*.xml - """ - junit "**/reports/junit/*.xml" + steps { + script { + sh """find ${OUTFILEPATH} -name '*.json' -exec gzip --verbose {} \\; + touch ${TESTRESULTPATH}/TEST-*.xml + """ + junit "**/reports/junit/*.xml" + } } } } From 494c5c4b464498d857de79566ff253e73a17bb61 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 26 May 2022 17:45:15 +0300 Subject: [PATCH 031/113] Updated Jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 3cb20f6..4242c16 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -32,7 +32,7 @@ pipeline { stage('Copy notebooks to Databricks') { steps { script { - sh('databricks workspace import_dir -o "${NOTEBOOKPATH}" "${WORKSPACEPATH}"') + sh("""databricks workspace import_dir -o "${NOTEBOOKPATH}" "${WORKSPACEPATH}"""") } } } From 724a17a538dee11b90f9ec32493c7a8968ed663f Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 26 May 2022 17:46:46 +0300 Subject: [PATCH 032/113] Updated Jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 4242c16..fc5bde0 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -32,7 +32,7 @@ pipeline { stage('Copy notebooks to Databricks') { steps { script { - sh("""databricks workspace import_dir -o "${NOTEBOOKPATH}" "${WORKSPACEPATH}"""") + sh('''databricks workspace import_dir -o '${NOTEBOOKPATH}' '${WORKSPACEPATH}' ''') } } } From 57ac9ff938fe556ccfd4ca14c0a716d240d5ffdf Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 26 May 2022 17:47:49 +0300 Subject: [PATCH 033/113] Updated Jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index fc5bde0..bea5fc5 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -32,7 +32,7 @@ pipeline { stage('Copy notebooks to Databricks') { steps { script { - sh('''databricks workspace import_dir -o '${NOTEBOOKPATH}' '${WORKSPACEPATH}' ''') + sh("databricks workspace import_dir -o '${NOTEBOOKPATH}' '${WORKSPACEPATH}'") } } } From 219c7cc54c09c158f8063dec00310751cb870cb4 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 26 May 2022 18:39:12 +0300 Subject: [PATCH 034/113] Updated Jenkinsfile --- Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index bea5fc5..4ac61ad 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -40,9 +40,9 @@ pipeline { steps { script { withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { - sh '''python3 ${SCRIPTPATH}/executenotebook.py --workspace=${DBURL}\ + sh '''python3 $SCRIPTPATH/executenotebook.py --workspace=$DBURL\ --token=$TOKEN\ - --clusterid=${CLUSTERID}\ + --clusterid=$CLUSTERID\ --localpath=${NOTEBOOKPATH}\ --workspacepath=${WORKSPACEPATH}\ --outfilepath=${OUTFILEPATH} From 3c6203e24ee72a0b788ca09daea6a8c54eb7e8d1 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 26 May 2022 18:40:52 +0300 Subject: [PATCH 035/113] Updated Jenkinsfile --- Jenkinsfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 4ac61ad..cd98a7b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -40,16 +40,16 @@ pipeline { steps { script { withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { - sh '''python3 $SCRIPTPATH/executenotebook.py --workspace=$DBURL\ + sh """python3 $SCRIPTPATH/executenotebook.py --workspace=$DBURL\ --token=$TOKEN\ --clusterid=$CLUSTERID\ --localpath=${NOTEBOOKPATH}\ --workspacepath=${WORKSPACEPATH}\ --outfilepath=${OUTFILEPATH} - ''' - sh '''sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py + """ + sh """sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py || true - ''' + """ } } } From 541e194c5b4d69dd662fabf9105599ac0708901d Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 26 May 2022 18:46:20 +0300 Subject: [PATCH 036/113] Updated Jenkinsfile --- Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index cd98a7b..1f1ad25 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -44,8 +44,8 @@ pipeline { --token=$TOKEN\ --clusterid=$CLUSTERID\ --localpath=${NOTEBOOKPATH}\ - --workspacepath=${WORKSPACEPATH}\ - --outfilepath=${OUTFILEPATH} + --workspacepath='${WORKSPACEPATH}'\ + --outfilepath='${OUTFILEPATH}' """ sh """sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py || true From 4670a6f15739e620d597dd8325de2d5b450ba697 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 26 May 2022 19:47:40 +0300 Subject: [PATCH 037/113] Updated Jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 1f1ad25..1f4b64b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -60,7 +60,7 @@ pipeline { sh """find ${OUTFILEPATH} -name '*.json' -exec gzip --verbose {} \\; touch ${TESTRESULTPATH}/TEST-*.xml """ - junit "**/reports/junit/*.xml" + junit "${TESTRESULTPATH}/TEST-*.xml" } } } From f33194dd788fe8fdb83960247c05a5f7541801dd Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 26 May 2022 20:26:04 +0300 Subject: [PATCH 038/113] Updated Jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 1f4b64b..452219e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -60,7 +60,7 @@ pipeline { sh """find ${OUTFILEPATH} -name '*.json' -exec gzip --verbose {} \\; touch ${TESTRESULTPATH}/TEST-*.xml """ - junit "${TESTRESULTPATH}/TEST-*.xml" + junit allowEmptyResults: true, testResults:"${TESTRESULTPATH}/TEST-notebookout.xml" } } } From abdd369a4cda7c7e1f55ab71b612cbd8001f00a7 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 26 May 2022 20:42:03 +0300 Subject: [PATCH 039/113] Updated Jenkinsfile --- .ci/Dockerfile.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/Dockerfile.build b/.ci/Dockerfile.build index 7bcf6c5..5d15c6c 100644 --- a/.ci/Dockerfile.build +++ b/.ci/Dockerfile.build @@ -3,4 +3,4 @@ FROM python:3.7-alpine ENV LC_ALL=C -RUN pip install databricks-cli requests \ No newline at end of file +RUN pip install databricks-cli requests pytest \ No newline at end of file From c375813fd8e76e8b75246824ca7df5133c5fe073 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 26 May 2022 21:20:36 +0300 Subject: [PATCH 040/113] Updated Jenkinsfile --- Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 452219e..c401dc0 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -7,7 +7,7 @@ def SCRIPTPATH = "./.ci" def NOTEBOOKPATH = "./databricks/python" def WORKSPACEPATH = "/Shared/Spark OCR/tests" def OUTFILEPATH = "." -def TESTRESULTPATH = "." +def TESTRESULTPATH = "./reports/junit" pipeline { agent { @@ -60,7 +60,7 @@ pipeline { sh """find ${OUTFILEPATH} -name '*.json' -exec gzip --verbose {} \\; touch ${TESTRESULTPATH}/TEST-*.xml """ - junit allowEmptyResults: true, testResults:"${TESTRESULTPATH}/TEST-notebookout.xml" + junit "**/reports/junit/*.xml" } } } From 8a8647091696f0240fbd57e6a00882dc9d0de74c Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Fri, 27 May 2022 07:29:06 +0300 Subject: [PATCH 041/113] Updated Jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index c401dc0..2ee06b3 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -48,7 +48,7 @@ pipeline { --outfilepath='${OUTFILEPATH}' """ sh """sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py - python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py || true + python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py """ } } From 6ae741c75158ed67fc2be1567017be9c2ebcdf7a Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Fri, 27 May 2022 08:47:55 +0300 Subject: [PATCH 042/113] Updated Jenkinsfile --- .ci/evaluatenotebookruns.py | 4 +++- .ci/executenotebook.py | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.ci/evaluatenotebookruns.py b/.ci/evaluatenotebookruns.py index 3169bb2..fa28423 100644 --- a/.ci/evaluatenotebookruns.py +++ b/.ci/evaluatenotebookruns.py @@ -3,6 +3,7 @@ import json import glob import os +import logging class TestJobOutput(unittest.TestCase): @@ -32,12 +33,13 @@ def test_job_run(self): for filename in glob.glob(os.path.join(path, '*.json')): - print('Evaluating: ' + filename) + logging.info('Evaluating: ' + filename) data = json.load(open(filename)) status = data['state']['result_state'] statuses.append(status) self.assertFalse('FAILED' in statuses) + self.assertFalse('RUNNING' in statuses) if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/.ci/executenotebook.py b/.ci/executenotebook.py index 619a139..1585fc8 100644 --- a/.ci/executenotebook.py +++ b/.ci/executenotebook.py @@ -82,7 +82,7 @@ def main(): d = json.loads(runjson) runid = d['run_id'] - i=0 + i = 0 waiting = True while waiting: time.sleep(10) @@ -93,9 +93,9 @@ def main(): j = json.loads(jobjson) current_state = j['state']['life_cycle_state'] runid = j['run_id'] - if current_state in ['TERMINATED', 'INTERNAL_ERROR', 'SKIPPED'] or i >= 12: + if current_state in ['TERMINATED', 'INTERNAL_ERROR', 'SKIPPED'] or i >= 24: break - i=i+1 + i = i + 1 if outfilepath != '': file = open(outfilepath + '/' + str(runid) + '.json', 'w') From 13a108fab616502539483b95effb2e6f9e82cfdf Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Fri, 27 May 2022 09:18:28 +0300 Subject: [PATCH 043/113] Updated Jenkinsfile --- .ci/evaluatenotebookruns.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.ci/evaluatenotebookruns.py b/.ci/evaluatenotebookruns.py index fa28423..f1912f9 100644 --- a/.ci/evaluatenotebookruns.py +++ b/.ci/evaluatenotebookruns.py @@ -7,7 +7,7 @@ class TestJobOutput(unittest.TestCase): - test_output_path = './tests/res' + test_output_path = '#ENV#' # def test_performance(self): # path = self.test_output_path @@ -39,7 +39,6 @@ def test_job_run(self): statuses.append(status) self.assertFalse('FAILED' in statuses) - self.assertFalse('RUNNING' in statuses) if __name__ == '__main__': unittest.main() \ No newline at end of file From b81c85384a23cffc4017d576989526f83463964a Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Fri, 27 May 2022 09:36:30 +0300 Subject: [PATCH 044/113] Updated Jenkinsfile --- .ci/evaluatenotebookruns.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.ci/evaluatenotebookruns.py b/.ci/evaluatenotebookruns.py index f1912f9..ddc0d99 100644 --- a/.ci/evaluatenotebookruns.py +++ b/.ci/evaluatenotebookruns.py @@ -35,10 +35,14 @@ def test_job_run(self): for filename in glob.glob(os.path.join(path, '*.json')): logging.info('Evaluating: ' + filename) data = json.load(open(filename)) - status = data['state']['result_state'] - statuses.append(status) + if data['state']['life_cycle_state'] == "RUNNING": + statuses.append('NOT_COMPLETED') + else: + status = data['state']['result_state'] + statuses.append(status) self.assertFalse('FAILED' in statuses) + self.assertFalse('NOT_COMPLETED' in statuses) if __name__ == '__main__': unittest.main() \ No newline at end of file From 482e8c67ff0c0bd003a54ed796cf34daac64556a Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Fri, 27 May 2022 09:51:15 +0300 Subject: [PATCH 045/113] Updated Jenkinsfile --- Jenkinsfile | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 2ee06b3..305cd25 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -48,21 +48,16 @@ pipeline { --outfilepath='${OUTFILEPATH}' """ sh """sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py - python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py + python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py """ } } } } - stage('Report Test Results') { - steps { - script { - sh """find ${OUTFILEPATH} -name '*.json' -exec gzip --verbose {} \\; - touch ${TESTRESULTPATH}/TEST-*.xml - """ - junit "**/reports/junit/*.xml" - } - } - } } + post { + always { + sh "touch ${TESTRESULTPATH}/TEST-*.xml" + junit allowEmptyResults: true, testResults: "**/reports/junit/*.xml" + } } From bffb2fc8c665ab774a203e3d651b9bb009de02e7 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Fri, 27 May 2022 09:54:42 +0300 Subject: [PATCH 046/113] Updated Jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 305cd25..0321de3 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -57,7 +57,7 @@ pipeline { } post { always { - sh "touch ${TESTRESULTPATH}/TEST-*.xml" junit allowEmptyResults: true, testResults: "**/reports/junit/*.xml" } + } } From b94cfeb4849e3bb7c52564887397f76827998232 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Fri, 27 May 2022 14:37:07 +0300 Subject: [PATCH 047/113] Updated Jenkinsfile --- .ci/executenotebook.py | 90 +++++++++++++++++++++++------------------- Jenkinsfile | 4 +- 2 files changed, 53 insertions(+), 41 deletions(-) diff --git a/.ci/executenotebook.py b/.ci/executenotebook.py index 1585fc8..9fdf2ee 100644 --- a/.ci/executenotebook.py +++ b/.ci/executenotebook.py @@ -6,6 +6,7 @@ import sys import getopt import time +import logging def main(): @@ -15,10 +16,11 @@ def main(): localpath = '' workspacepath = '' outfilepath = '' + ignore = '' try: opts, args = getopt.getopt(sys.argv[1:], 'hs:t:c:lwo', - ['workspace=', 'token=', 'clusterid=', 'localpath=', 'workspacepath=', 'outfilepath=']) + ['workspace=', 'token=', 'clusterid=', 'localpath=', 'workspacepath=', 'outfilepath=', 'ignore=']) except getopt.GetoptError: print( 'executenotebook.py -s -t -c -l -w -o )') @@ -41,6 +43,8 @@ def main(): workspacepath = arg elif opt in ('-o', '--outfilepath'): outfilepath = arg + elif opt in ('-i', '--ignore'): + ignore = arg print('-s is ' + workspace) print('-t is ' + token) @@ -48,11 +52,17 @@ def main(): print('-l is ' + localpath) print('-w is ' + workspacepath) print('-o is ' + outfilepath) + print('-i is ' + ignore) # Generate array from walking local path + ignore = ignore.split(',') + notebooks = [] for path, subdirs, files in os.walk(localpath): for name in files: + if name in ignore: + logging.warning(f'Ignore ${name}') + continue fullpath = path + '/' + name # removes localpath to repo but keeps workspace path fullworkspacepath = workspacepath + path.replace(localpath, '') @@ -62,45 +72,45 @@ def main(): row = [fullpath, fullworkspacepath, 1] notebooks.append(row) - # run each element in list - for notebook in notebooks: - nameonly = os.path.basename(notebook[0]) - workspacepath = notebook[1] - - name, file_extension = os.path.splitext(nameonly) - - # workpath removes extension - fullworkspacepath = workspacepath + '/' + name - - print('Running job for:' + fullworkspacepath) - values = {'run_name': name, 'existing_cluster_id': clusterid, 'timeout_seconds': 3600, 'notebook_task': {'notebook_path': fullworkspacepath}} - - resp = requests.post(workspace + '/api/2.0/jobs/runs/submit', - data=json.dumps(values), auth=("token", token)) - runjson = resp.text - print("runjson:" + runjson) - d = json.loads(runjson) - runid = d['run_id'] - - i = 0 - waiting = True - while waiting: - time.sleep(10) - jobresp = requests.get(workspace + '/api/2.0/jobs/runs/get?run_id='+str(runid), - data=json.dumps(values), auth=("token", token)) - jobjson = jobresp.text - print("jobjson:" + jobjson) - j = json.loads(jobjson) - current_state = j['state']['life_cycle_state'] - runid = j['run_id'] - if current_state in ['TERMINATED', 'INTERNAL_ERROR', 'SKIPPED'] or i >= 24: - break - i = i + 1 - - if outfilepath != '': - file = open(outfilepath + '/' + str(runid) + '.json', 'w') - file.write(json.dumps(j)) - file.close() + # # run each element in list + # for notebook in notebooks: + # nameonly = os.path.basename(notebook[0]) + # workspacepath = notebook[1] + # + # name, file_extension = os.path.splitext(nameonly) + # + # # workpath removes extension + # fullworkspacepath = workspacepath + '/' + name + # + # print('Running job for:' + fullworkspacepath) + # values = {'run_name': name, 'existing_cluster_id': clusterid, 'timeout_seconds': 3600, 'notebook_task': {'notebook_path': fullworkspacepath}} + # + # resp = requests.post(workspace + '/api/2.0/jobs/runs/submit', + # data=json.dumps(values), auth=("token", token)) + # runjson = resp.text + # print("runjson:" + runjson) + # d = json.loads(runjson) + # runid = d['run_id'] + # + # i = 0 + # waiting = True + # while waiting: + # time.sleep(20) + # jobresp = requests.get(workspace + '/api/2.0/jobs/runs/get?run_id='+str(runid), + # data=json.dumps(values), auth=("token", token)) + # jobjson = jobresp.text + # print("jobjson:" + jobjson) + # j = json.loads(jobjson) + # current_state = j['state']['life_cycle_state'] + # runid = j['run_id'] + # if current_state in ['TERMINATED', 'INTERNAL_ERROR', 'SKIPPED'] or i >= 24: + # break + # i = i + 1 + # + # if outfilepath != '': + # file = open(outfilepath + '/' + str(runid) + '.json', 'w') + # file.write(json.dumps(j)) + # file.close() if __name__ == '__main__': main() \ No newline at end of file diff --git a/Jenkinsfile b/Jenkinsfile index 0321de3..09e5f08 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -8,6 +8,7 @@ def NOTEBOOKPATH = "./databricks/python" def WORKSPACEPATH = "/Shared/Spark OCR/tests" def OUTFILEPATH = "." def TESTRESULTPATH = "./reports/junit" +def IGNORE = "3. Compare CPU and GPU image processing with Spark OCR.ipynb" pipeline { agent { @@ -45,7 +46,8 @@ pipeline { --clusterid=$CLUSTERID\ --localpath=${NOTEBOOKPATH}\ --workspacepath='${WORKSPACEPATH}'\ - --outfilepath='${OUTFILEPATH}' + --outfilepath='${OUTFILEPATH}'\ + --ignore='${IGNORE}' """ sh """sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py From 003953aa34a2bb350779567834a1319176adbbc7 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Fri, 27 May 2022 14:40:26 +0300 Subject: [PATCH 048/113] Updated Jenkinsfile --- .ci/executenotebook.py | 78 +++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/.ci/executenotebook.py b/.ci/executenotebook.py index 9fdf2ee..7fc5766 100644 --- a/.ci/executenotebook.py +++ b/.ci/executenotebook.py @@ -72,45 +72,45 @@ def main(): row = [fullpath, fullworkspacepath, 1] notebooks.append(row) - # # run each element in list - # for notebook in notebooks: - # nameonly = os.path.basename(notebook[0]) - # workspacepath = notebook[1] - # - # name, file_extension = os.path.splitext(nameonly) - # - # # workpath removes extension - # fullworkspacepath = workspacepath + '/' + name - # - # print('Running job for:' + fullworkspacepath) - # values = {'run_name': name, 'existing_cluster_id': clusterid, 'timeout_seconds': 3600, 'notebook_task': {'notebook_path': fullworkspacepath}} - # - # resp = requests.post(workspace + '/api/2.0/jobs/runs/submit', - # data=json.dumps(values), auth=("token", token)) - # runjson = resp.text - # print("runjson:" + runjson) - # d = json.loads(runjson) - # runid = d['run_id'] - # - # i = 0 - # waiting = True - # while waiting: - # time.sleep(20) - # jobresp = requests.get(workspace + '/api/2.0/jobs/runs/get?run_id='+str(runid), - # data=json.dumps(values), auth=("token", token)) - # jobjson = jobresp.text - # print("jobjson:" + jobjson) - # j = json.loads(jobjson) - # current_state = j['state']['life_cycle_state'] - # runid = j['run_id'] - # if current_state in ['TERMINATED', 'INTERNAL_ERROR', 'SKIPPED'] or i >= 24: - # break - # i = i + 1 - # - # if outfilepath != '': - # file = open(outfilepath + '/' + str(runid) + '.json', 'w') - # file.write(json.dumps(j)) - # file.close() + # run each element in list + for notebook in notebooks: + nameonly = os.path.basename(notebook[0]) + workspacepath = notebook[1] + + name, file_extension = os.path.splitext(nameonly) + + # workpath removes extension + fullworkspacepath = workspacepath + '/' + name + + print('Running job for:' + fullworkspacepath) + values = {'run_name': name, 'existing_cluster_id': clusterid, 'timeout_seconds': 3600, 'notebook_task': {'notebook_path': fullworkspacepath}} + + resp = requests.post(workspace + '/api/2.0/jobs/runs/submit', + data=json.dumps(values), auth=("token", token)) + runjson = resp.text + print("runjson:" + runjson) + d = json.loads(runjson) + runid = d['run_id'] + + i = 0 + waiting = True + while waiting: + time.sleep(20) + jobresp = requests.get(workspace + '/api/2.0/jobs/runs/get?run_id='+str(runid), + data=json.dumps(values), auth=("token", token)) + jobjson = jobresp.text + print("jobjson:" + jobjson) + j = json.loads(jobjson) + current_state = j['state']['life_cycle_state'] + runid = j['run_id'] + if current_state in ['TERMINATED', 'INTERNAL_ERROR', 'SKIPPED'] or i >= 24: + break + i = i + 1 + + if outfilepath != '': + file = open(outfilepath + '/' + str(runid) + '.json', 'w') + file.write(json.dumps(j)) + file.close() if __name__ == '__main__': main() \ No newline at end of file From eb796417c03aff635637ffa9d580caf60afb792f Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Fri, 27 May 2022 15:39:40 +0300 Subject: [PATCH 049/113] Updated Jenkinsfile --- Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile b/Jenkinsfile index 09e5f08..30335be 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -59,6 +59,7 @@ pipeline { } post { always { + sh "find ${OUTFILEPATH} -name '*.json' -exec rm {} +" junit allowEmptyResults: true, testResults: "**/reports/junit/*.xml" } } From dae45d7498b2e5343ead33e95ff1d7c6727daf60 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Sat, 28 May 2022 08:04:19 +0300 Subject: [PATCH 050/113] Updated Jenkinsfile --- Jenkinsfile | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index 30335be..67d177c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -37,6 +37,22 @@ pipeline { } } } + stage('Start cluster') { + steps { + script { + sh("databricks clusters start --cluster-id ${CLUSTERID}") + timeout(5) { + waitUntil { + script { + def respString = sh script: "databricks clusters get --cluster-id ${CLUSTERID}", returnStdout: true + def respJson = readJSON text: respString + return (respJson['state'] == 'RUNNING'); + } + } + } + } + } + } stage('Run Notebook Tests') { steps { script { From 5e3dacc56eed94332c2a3034a61f3818a0224ded Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Sat, 28 May 2022 08:07:56 +0300 Subject: [PATCH 051/113] Updated Jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 67d177c..394fb69 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -40,7 +40,7 @@ pipeline { stage('Start cluster') { steps { script { - sh("databricks clusters start --cluster-id ${CLUSTERID}") + sh("databricks clusters start --cluster-id ${CLUSTERID} || True") timeout(5) { waitUntil { script { From 3fd5ff54e6a9b61516f7ab2c81a21b121e9bb164 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Sat, 28 May 2022 08:09:09 +0300 Subject: [PATCH 052/113] Updated Jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 394fb69..9fed778 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -40,7 +40,7 @@ pipeline { stage('Start cluster') { steps { script { - sh("databricks clusters start --cluster-id ${CLUSTERID} || True") + sh("databricks clusters start --cluster-id ${CLUSTERID} || true") timeout(5) { waitUntil { script { From d3b2667c0c3774c6ae737f239bc4c03156d404c4 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 31 May 2022 13:37:52 +0300 Subject: [PATCH 053/113] Updated jenkinsfile --- Jenkinsfile | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 9fed778..ef6cb77 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -10,6 +10,13 @@ def OUTFILEPATH = "." def TESTRESULTPATH = "./reports/junit" def IGNORE = "3. Compare CPU and GPU image processing with Spark OCR.ipynb" +def SPARK_NLP_VERSION = "3.4.2" +def SPARK_NLP_HEALTHCARE_VERSION = "3.4.2" +def SPARK_OCR_VERSION = "3.12.0" + +def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION) +def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) + pipeline { agent { dockerfile { @@ -41,7 +48,7 @@ pipeline { steps { script { sh("databricks clusters start --cluster-id ${CLUSTERID} || true") - timeout(5) { + timeout(10) { waitUntil { script { def respString = sh script: "databricks clusters get --cluster-id ${CLUSTERID}", returnStdout: true @@ -53,6 +60,19 @@ pipeline { } } } + stage('Install deps to Cluster') { + steps { + script { + sh("databricks libraries uninstall --cluster-id ${CLUSTERID} --all") + sh("databricks libraries install --cluster-id ${CLUSTERID} --jar s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-spark30.jar") + sh("databricks libraries install --cluster-id ${CLUSTERID} --jar s3://pypi.johnsnowlabs.com/${SPARK_NLP_HEALTHCARE_VERSION}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}.jar") + sh("databricks libraries install --cluster-id ${CLUSTERID} --maven-coordinates com.johnsnowlabs.nlp:spark-nlp_2.12:${SPARK_NLP_VERSION}") + sh("databricks libraries install --cluster-id ${CLUSTERID} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/spark-ocr/spark_ocr-${SPARK_OCR_VERSION}+spark30-py3-none-any.whl") + sh("databricks libraries install --cluster-id ${CLUSTERID} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl/spark_nlp_jsl-${SPARK_NLP_VERSION}-py3-none-any.whl") + sh("databricks libraries install --cluster-id ${CLUSTERID} --pypi-package spark-nlp==${SPARK_NLP_VERSION}") + } + } + } stage('Run Notebook Tests') { steps { script { From 05e8700cee458b731fce5f9bb3e750981f0c1453 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 31 May 2022 13:44:06 +0300 Subject: [PATCH 054/113] Updated jenkinsfile --- Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile b/Jenkinsfile index ef6cb77..735079a 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -25,6 +25,7 @@ pipeline { } environment { DATABRICKS_CONFIG_FILE = ".databricks.cfg" + GITHUB_CREDS = credentials('55e7e818-4ccf-4d23-b54c-fd97c21081ba') } stages { stage('Setup') { From 2ccd7c0788857320b6b220c4d830b940fb379f2e Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 31 May 2022 13:53:00 +0300 Subject: [PATCH 055/113] Updated jenkinsfile --- Jenkinsfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 735079a..29c5c1e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -14,9 +14,6 @@ def SPARK_NLP_VERSION = "3.4.2" def SPARK_NLP_HEALTHCARE_VERSION = "3.4.2" def SPARK_OCR_VERSION = "3.12.0" -def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION) -def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) - pipeline { agent { dockerfile { @@ -64,6 +61,9 @@ pipeline { stage('Install deps to Cluster') { steps { script { + def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION) + def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) + sh("databricks libraries uninstall --cluster-id ${CLUSTERID} --all") sh("databricks libraries install --cluster-id ${CLUSTERID} --jar s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-spark30.jar") sh("databricks libraries install --cluster-id ${CLUSTERID} --jar s3://pypi.johnsnowlabs.com/${SPARK_NLP_HEALTHCARE_VERSION}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}.jar") From 772cbcf844b01d01cd7bc1dc04200f14e6b945ba Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 31 May 2022 13:57:47 +0300 Subject: [PATCH 056/113] Updated jenkinsfile --- .ci/Dockerfile.build | 2 +- Jenkinsfile | 32 ++++++++++++++++---------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/.ci/Dockerfile.build b/.ci/Dockerfile.build index 5d15c6c..19de535 100644 --- a/.ci/Dockerfile.build +++ b/.ci/Dockerfile.build @@ -3,4 +3,4 @@ FROM python:3.7-alpine ENV LC_ALL=C -RUN pip install databricks-cli requests pytest \ No newline at end of file +RUN pip install databricks-cli requests pytest gh \ No newline at end of file diff --git a/Jenkinsfile b/Jenkinsfile index 29c5c1e..580ddd6 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -42,22 +42,6 @@ pipeline { } } } - stage('Start cluster') { - steps { - script { - sh("databricks clusters start --cluster-id ${CLUSTERID} || true") - timeout(10) { - waitUntil { - script { - def respString = sh script: "databricks clusters get --cluster-id ${CLUSTERID}", returnStdout: true - def respJson = readJSON text: respString - return (respJson['state'] == 'RUNNING'); - } - } - } - } - } - } stage('Install deps to Cluster') { steps { script { @@ -74,6 +58,22 @@ pipeline { } } } + stage('Start cluster') { + steps { + script { + sh("databricks clusters restart --cluster-id ${CLUSTERID} || true") + timeout(10) { + waitUntil { + script { + def respString = sh script: "databricks clusters get --cluster-id ${CLUSTERID}", returnStdout: true + def respJson = readJSON text: respString + return (respJson['state'] == 'RUNNING'); + } + } + } + } + } + } stage('Run Notebook Tests') { steps { script { From 06e069a16cdf4972a1764641293626823c0cdc03 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 31 May 2022 14:01:03 +0300 Subject: [PATCH 057/113] Updated jenkinsfile --- .ci/Dockerfile.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/Dockerfile.build b/.ci/Dockerfile.build index 19de535..5febfa2 100644 --- a/.ci/Dockerfile.build +++ b/.ci/Dockerfile.build @@ -3,4 +3,4 @@ FROM python:3.7-alpine ENV LC_ALL=C -RUN pip install databricks-cli requests pytest gh \ No newline at end of file +RUN pip install databricks-cli requests pytest gh git \ No newline at end of file From 44f8ebafa75b5f16aa5af0823a7e5bdf5fc192c6 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 31 May 2022 14:09:43 +0300 Subject: [PATCH 058/113] Updated jenkinsfile --- .ci/Dockerfile.build | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.ci/Dockerfile.build b/.ci/Dockerfile.build index 5febfa2..283f72b 100644 --- a/.ci/Dockerfile.build +++ b/.ci/Dockerfile.build @@ -3,4 +3,6 @@ FROM python:3.7-alpine ENV LC_ALL=C -RUN pip install databricks-cli requests pytest gh git \ No newline at end of file +RUN apt-get -y update && apt-get -y install git + +RUN pip install databricks-cli requests pytest gh \ No newline at end of file From 7e03afd5f60935923293b24c153269445c63ea6a Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 31 May 2022 14:14:01 +0300 Subject: [PATCH 059/113] Updated jenkinsfile --- .ci/Dockerfile.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/Dockerfile.build b/.ci/Dockerfile.build index 283f72b..5d26695 100644 --- a/.ci/Dockerfile.build +++ b/.ci/Dockerfile.build @@ -3,6 +3,6 @@ FROM python:3.7-alpine ENV LC_ALL=C -RUN apt-get -y update && apt-get -y install git +RUN apk add --no-cache git RUN pip install databricks-cli requests pytest gh \ No newline at end of file From bc97e15684f7bd827027d8b0c0ce66ec30c1f0da Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 31 May 2022 14:21:48 +0300 Subject: [PATCH 060/113] Updated jenkinsfile --- .ci/Dockerfile.build | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.ci/Dockerfile.build b/.ci/Dockerfile.build index 5d26695..5d15c6c 100644 --- a/.ci/Dockerfile.build +++ b/.ci/Dockerfile.build @@ -3,6 +3,4 @@ FROM python:3.7-alpine ENV LC_ALL=C -RUN apk add --no-cache git - -RUN pip install databricks-cli requests pytest gh \ No newline at end of file +RUN pip install databricks-cli requests pytest \ No newline at end of file From da861ef0be74558907e3ea08d16b39297ee7b273 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 31 May 2022 14:26:34 +0300 Subject: [PATCH 061/113] Updated jenkinsfile --- Jenkinsfile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 580ddd6..cc48653 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -14,6 +14,10 @@ def SPARK_NLP_VERSION = "3.4.2" def SPARK_NLP_HEALTHCARE_VERSION = "3.4.2" def SPARK_OCR_VERSION = "3.12.0" +def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION) +def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) + + pipeline { agent { dockerfile { @@ -45,9 +49,6 @@ pipeline { stage('Install deps to Cluster') { steps { script { - def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION) - def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) - sh("databricks libraries uninstall --cluster-id ${CLUSTERID} --all") sh("databricks libraries install --cluster-id ${CLUSTERID} --jar s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-spark30.jar") sh("databricks libraries install --cluster-id ${CLUSTERID} --jar s3://pypi.johnsnowlabs.com/${SPARK_NLP_HEALTHCARE_VERSION}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}.jar") From 0b213a3da5c59c3449091d292c994bdaf7cea0f8 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 31 May 2022 14:30:12 +0300 Subject: [PATCH 062/113] Updated jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index cc48653..2b55ddf 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -51,7 +51,7 @@ pipeline { script { sh("databricks libraries uninstall --cluster-id ${CLUSTERID} --all") sh("databricks libraries install --cluster-id ${CLUSTERID} --jar s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-spark30.jar") - sh("databricks libraries install --cluster-id ${CLUSTERID} --jar s3://pypi.johnsnowlabs.com/${SPARK_NLP_HEALTHCARE_VERSION}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}.jar") + sh("databricks libraries install --cluster-id ${CLUSTERID} --jar s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}.jar") sh("databricks libraries install --cluster-id ${CLUSTERID} --maven-coordinates com.johnsnowlabs.nlp:spark-nlp_2.12:${SPARK_NLP_VERSION}") sh("databricks libraries install --cluster-id ${CLUSTERID} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/spark-ocr/spark_ocr-${SPARK_OCR_VERSION}+spark30-py3-none-any.whl") sh("databricks libraries install --cluster-id ${CLUSTERID} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl/spark_nlp_jsl-${SPARK_NLP_VERSION}-py3-none-any.whl") From c0c305941be37a648c91c43ef132c1613ab78434 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 31 May 2022 16:49:20 +0300 Subject: [PATCH 063/113] Updated jenkinsfile --- Jenkinsfile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 2b55ddf..bc9fe4e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -62,7 +62,13 @@ pipeline { stage('Start cluster') { steps { script { - sh("databricks clusters restart --cluster-id ${CLUSTERID} || true") + def respString = sh script: "databricks clusters get --cluster-id ${CLUSTERID}", returnStdout: true + def respJson = readJSON text: respString + if (respJson['state'] == 'RUNNING') { + sh("databricks clusters restart --cluster-id ${CLUSTERID}") + } else { + sh("databricks clusters start --cluster-id ${CLUSTERID}") + } timeout(10) { waitUntil { script { From 6d4258dabf8ebbd86c0dc25b61366dc7cb32c735 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Tue, 31 May 2022 16:51:41 +0300 Subject: [PATCH 064/113] Updated jenkinsfile --- Jenkinsfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index bc9fe4e..87e572b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -72,9 +72,9 @@ pipeline { timeout(10) { waitUntil { script { - def respString = sh script: "databricks clusters get --cluster-id ${CLUSTERID}", returnStdout: true - def respJson = readJSON text: respString - return (respJson['state'] == 'RUNNING'); + def respStringWait = sh script: "databricks clusters get --cluster-id ${CLUSTERID}", returnStdout: true + def respJsonWait = readJSON text: respStringWait + return (respJsonWait['state'] == 'RUNNING'); } } } From ea41871a8ba697f9def111b1dd54c42c110599dc Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 1 Jun 2022 08:52:37 +0300 Subject: [PATCH 065/113] Updated jenkinsfile --- Jenkinsfile | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index 87e572b..5cf878f 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -18,6 +18,12 @@ def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(S def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) +def runtimeRespString = sh script: "databricks clusters spark-versions", returnStdout: true +def runtimeRespJson = readJSON text: runtimeRespString + +def runtimes = runtimeRespJson['versions'].collect { it['key'] }.join('\n') + + pipeline { agent { dockerfile { @@ -28,6 +34,13 @@ pipeline { DATABRICKS_CONFIG_FILE = ".databricks.cfg" GITHUB_CREDS = credentials('55e7e818-4ccf-4d23-b54c-fd97c21081ba') } + parameters { + choice( + name:'databricks_runtime', + choices:runtimes, + description:'define spark version' + ) + } stages { stage('Setup') { steps { From cd957ffb284c329085a23956c1a2645719c10ef7 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 1 Jun 2022 08:53:59 +0300 Subject: [PATCH 066/113] Updated jenkinsfile --- Jenkinsfile | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 5cf878f..3a06755 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -17,12 +17,11 @@ def SPARK_OCR_VERSION = "3.12.0" def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION) def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) - -def runtimeRespString = sh script: "databricks clusters spark-versions", returnStdout: true -def runtimeRespJson = readJSON text: runtimeRespString - -def runtimes = runtimeRespJson['versions'].collect { it['key'] }.join('\n') - +node { + def runtimeRespString = sh script: "databricks clusters spark-versions", returnStdout: true + def runtimeRespJson = readJSON text: runtimeRespString + def runtimes = runtimeRespJson['versions'].collect { it['key'] }.join('\n') +} pipeline { agent { From 21c5815c8be5a7aa18395ce613934b91f4f73baa Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 1 Jun 2022 08:56:36 +0300 Subject: [PATCH 067/113] Updated jenkinsfile --- Jenkinsfile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 3a06755..0c2a43c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -17,7 +17,11 @@ def SPARK_OCR_VERSION = "3.12.0" def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION) def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) -node { +dockerNode { + withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { + sh('echo "${TOKEN}" > secret.txt') + sh("databricks configure --token-file secret.txt --host ${DBURL}") + } def runtimeRespString = sh script: "databricks clusters spark-versions", returnStdout: true def runtimeRespJson = readJSON text: runtimeRespString def runtimes = runtimeRespJson['versions'].collect { it['key'] }.join('\n') From c53b92ba312244fc57f975ee2da5b1815ca992a8 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 1 Jun 2022 09:26:05 +0300 Subject: [PATCH 068/113] Updated jenkinsfile --- Jenkinsfile | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 0c2a43c..0bb8cce 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -17,15 +17,6 @@ def SPARK_OCR_VERSION = "3.12.0" def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION) def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) -dockerNode { - withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { - sh('echo "${TOKEN}" > secret.txt') - sh("databricks configure --token-file secret.txt --host ${DBURL}") - } - def runtimeRespString = sh script: "databricks clusters spark-versions", returnStdout: true - def runtimeRespJson = readJSON text: runtimeRespString - def runtimes = runtimeRespJson['versions'].collect { it['key'] }.join('\n') -} pipeline { agent { @@ -40,7 +31,7 @@ pipeline { parameters { choice( name:'databricks_runtime', - choices:runtimes, + choices:'6.4.x-esr-scala2.11\n7.3.x-cpu-ml-scala2.12\n7.3.x-hls-scala2.12\n10.2.x-gpu-ml-scala2.12\n10.5.x-aarch64-scala2.12\n7.3.x-gpu-ml-scala2.12\n10.2.x-aarch64-photon-scala2.12\n10.4.x-cpu-ml-scala2.12\n9.1.x-aarch64-scala2.12\n10.1.x-photon-scala2.12\n9.1.x-photon-scala2.12\n10.4.x-scala2.12\n10.2.x-photon-scala2.12\n10.4.x-photon-scala2.12\n11.0.x-photon-scala2.12\n10.3.x-photon-scala2.12\n10.5.x-photon-scala2.12\n10.1.x-gpu-ml-scala2.12\n9.1.x-scala2.12\n11.0.x-scala2.12\n10.3.x-cpu-ml-scala2.12\n10.3.x-aarch64-photon-scala2.12\n11.0.x-gpu-ml-scala2.12\n10.5.x-aarch64-photon-scala2.12\n10.1.x-cpu-ml-scala2.12\n10.4.x-aarch64-photon-scala2.12\n10.5.x-gpu-ml-scala2.12\napache-spark-2.4.x-esr-scala2.11\n10.1.x-scala2.12\n9.1.x-cpu-ml-scala2.12\n11.0.x-cpu-ml-scala2.12\n10.2.x-aarch64-scala2.12\n10.2.x-scala2.12\n10.2.x-cpu-ml-scala2.12\n11.0.x-aarch64-photon-scala2.12\n10.4.x-aarch64-scala2.12\n11.0.x-aarch64-scala2.12\n10.1.x-aarch64-scala2.12\n9.1.x-gpu-ml-scala2.12\napache-spark-2.4.x-scala2.11\n10.5.x-scala2.12\n7.3.x-scala2.12\n10.3.x-scala2.12\n10.3.x-aarch64-scala2.12\n10.5.x-cpu-ml-scala2.12\n10.3.x-gpu-ml-scala2.12\n10.4.x-gpu-ml-scala2.12', description:'define spark version' ) } From 4d62a3fc0f440c0079f06b7b9500fd0efd29bebb Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 1 Jun 2022 14:29:20 +0300 Subject: [PATCH 069/113] Updated jenkinsfile --- Jenkinsfile | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 0bb8cce..85eea66 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -2,7 +2,7 @@ def DBTOKEN = "DATABRICKS_TOKEN" def DBURL = "https://dbc-6ca13d9d-74bb.cloud.databricks.com" -def CLUSTERID = "0428-112519-vaxgi8gx" +//def CLUSTERID = "0428-112519-vaxgi8gx" def SCRIPTPATH = "./.ci" def NOTEBOOKPATH = "./databricks/python" def WORKSPACEPATH = "/Shared/Spark OCR/tests" @@ -33,6 +33,7 @@ pipeline { name:'databricks_runtime', choices:'6.4.x-esr-scala2.11\n7.3.x-cpu-ml-scala2.12\n7.3.x-hls-scala2.12\n10.2.x-gpu-ml-scala2.12\n10.5.x-aarch64-scala2.12\n7.3.x-gpu-ml-scala2.12\n10.2.x-aarch64-photon-scala2.12\n10.4.x-cpu-ml-scala2.12\n9.1.x-aarch64-scala2.12\n10.1.x-photon-scala2.12\n9.1.x-photon-scala2.12\n10.4.x-scala2.12\n10.2.x-photon-scala2.12\n10.4.x-photon-scala2.12\n11.0.x-photon-scala2.12\n10.3.x-photon-scala2.12\n10.5.x-photon-scala2.12\n10.1.x-gpu-ml-scala2.12\n9.1.x-scala2.12\n11.0.x-scala2.12\n10.3.x-cpu-ml-scala2.12\n10.3.x-aarch64-photon-scala2.12\n11.0.x-gpu-ml-scala2.12\n10.5.x-aarch64-photon-scala2.12\n10.1.x-cpu-ml-scala2.12\n10.4.x-aarch64-photon-scala2.12\n10.5.x-gpu-ml-scala2.12\napache-spark-2.4.x-esr-scala2.11\n10.1.x-scala2.12\n9.1.x-cpu-ml-scala2.12\n11.0.x-cpu-ml-scala2.12\n10.2.x-aarch64-scala2.12\n10.2.x-scala2.12\n10.2.x-cpu-ml-scala2.12\n11.0.x-aarch64-photon-scala2.12\n10.4.x-aarch64-scala2.12\n11.0.x-aarch64-scala2.12\n10.1.x-aarch64-scala2.12\n9.1.x-gpu-ml-scala2.12\napache-spark-2.4.x-scala2.11\n10.5.x-scala2.12\n7.3.x-scala2.12\n10.3.x-scala2.12\n10.3.x-aarch64-scala2.12\n10.5.x-cpu-ml-scala2.12\n10.3.x-gpu-ml-scala2.12\n10.4.x-gpu-ml-scala2.12', description:'define spark version' + defaultValue: '7.3.x-scala2.12' ) } stages { @@ -53,6 +54,45 @@ pipeline { } } } + stage('Create Cluster') { + steps { + script { + withCredentials([string(credentialsId:'TEST_SPARK_OCR_LICENSE',variable:'SPARK_OCR_LICENSE'),[ + $class: 'AmazonWebServicesCredentialsBinding', + credentialsId: 'a4362e3b-808e-45e0-b7d2-1c62b0572df4', + accessKeyVariable: 'AWS_ACCESS_KEY_ID', + secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']]) { + def jsonCluster = ''' + { + "num_workers": 1, + "cluster_name": "Spark Ocr Notebook Test", + "spark_version": "${databricks_runtime}", + "spark_conf": { + "spark.sql.legacy.allowUntypedScalaUDF": "true" + }, + "aws_attributes": { + "first_on_demand": 1, + "availability": "SPOT_WITH_FALLBACK", + "zone_id": "us-west-2a", + "spot_bid_price_percent": 100, + "ebs_volume_count": 0 + }, + "node_type_id": "i3.xlarge", + "driver_node_type_id": "i3.xlarge", + "spark_env_vars": { + "JSL_OCR_LICENSE": "${SPARK_OCR_LICENSE}", + "AWS_ACCESS_KEY_ID": "${AWS_ACCESS_KEY_ID}", + "AWS_SECRET_ACCESS_KEY": "${AWS_SECRET_ACCESS_KEY}" + }, + "autotermination_minutes": 20, + } + ''' + def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json ${jsonCluster}") + def CLUSTERID = (readJSON text: clusterRespString)['cluster_id'] + } + } + } + } stage('Install deps to Cluster') { steps { script { From b1b376b55fb51c25a43f233225de818872f3422d Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 1 Jun 2022 14:33:16 +0300 Subject: [PATCH 070/113] Updated jenkinsfile --- Jenkinsfile | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 85eea66..84dd1b6 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -32,7 +32,7 @@ pipeline { choice( name:'databricks_runtime', choices:'6.4.x-esr-scala2.11\n7.3.x-cpu-ml-scala2.12\n7.3.x-hls-scala2.12\n10.2.x-gpu-ml-scala2.12\n10.5.x-aarch64-scala2.12\n7.3.x-gpu-ml-scala2.12\n10.2.x-aarch64-photon-scala2.12\n10.4.x-cpu-ml-scala2.12\n9.1.x-aarch64-scala2.12\n10.1.x-photon-scala2.12\n9.1.x-photon-scala2.12\n10.4.x-scala2.12\n10.2.x-photon-scala2.12\n10.4.x-photon-scala2.12\n11.0.x-photon-scala2.12\n10.3.x-photon-scala2.12\n10.5.x-photon-scala2.12\n10.1.x-gpu-ml-scala2.12\n9.1.x-scala2.12\n11.0.x-scala2.12\n10.3.x-cpu-ml-scala2.12\n10.3.x-aarch64-photon-scala2.12\n11.0.x-gpu-ml-scala2.12\n10.5.x-aarch64-photon-scala2.12\n10.1.x-cpu-ml-scala2.12\n10.4.x-aarch64-photon-scala2.12\n10.5.x-gpu-ml-scala2.12\napache-spark-2.4.x-esr-scala2.11\n10.1.x-scala2.12\n9.1.x-cpu-ml-scala2.12\n11.0.x-cpu-ml-scala2.12\n10.2.x-aarch64-scala2.12\n10.2.x-scala2.12\n10.2.x-cpu-ml-scala2.12\n11.0.x-aarch64-photon-scala2.12\n10.4.x-aarch64-scala2.12\n11.0.x-aarch64-scala2.12\n10.1.x-aarch64-scala2.12\n9.1.x-gpu-ml-scala2.12\napache-spark-2.4.x-scala2.11\n10.5.x-scala2.12\n7.3.x-scala2.12\n10.3.x-scala2.12\n10.3.x-aarch64-scala2.12\n10.5.x-cpu-ml-scala2.12\n10.3.x-gpu-ml-scala2.12\n10.4.x-gpu-ml-scala2.12', - description:'define spark version' + description:'define spark version', defaultValue: '7.3.x-scala2.12' ) } @@ -64,27 +64,27 @@ pipeline { secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']]) { def jsonCluster = ''' { - "num_workers": 1, - "cluster_name": "Spark Ocr Notebook Test", - "spark_version": "${databricks_runtime}", - "spark_conf": { - "spark.sql.legacy.allowUntypedScalaUDF": "true" - }, - "aws_attributes": { - "first_on_demand": 1, - "availability": "SPOT_WITH_FALLBACK", - "zone_id": "us-west-2a", - "spot_bid_price_percent": 100, - "ebs_volume_count": 0 - }, - "node_type_id": "i3.xlarge", - "driver_node_type_id": "i3.xlarge", - "spark_env_vars": { - "JSL_OCR_LICENSE": "${SPARK_OCR_LICENSE}", - "AWS_ACCESS_KEY_ID": "${AWS_ACCESS_KEY_ID}", - "AWS_SECRET_ACCESS_KEY": "${AWS_SECRET_ACCESS_KEY}" - }, - "autotermination_minutes": 20, + "num_workers": 1, + "cluster_name": "Spark Ocr Notebook Test", + "spark_version": "${databricks_runtime}", + "spark_conf": { + "spark.sql.legacy.allowUntypedScalaUDF": "true" + }, + "aws_attributes": { + "first_on_demand": 1, + "availability": "SPOT_WITH_FALLBACK", + "zone_id": "us-west-2a", + "spot_bid_price_percent": 100, + "ebs_volume_count": 0 + }, + "node_type_id": "i3.xlarge", + "driver_node_type_id": "i3.xlarge", + "spark_env_vars": { + "JSL_OCR_LICENSE": "${SPARK_OCR_LICENSE}", + "AWS_ACCESS_KEY_ID": "${AWS_ACCESS_KEY_ID}", + "AWS_SECRET_ACCESS_KEY": "${AWS_SECRET_ACCESS_KEY}" + }, + "autotermination_minutes": 20, } ''' def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json ${jsonCluster}") From dcc1719048c687e44b24b8e7e65fd238b468cf6e Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 1 Jun 2022 14:34:53 +0300 Subject: [PATCH 071/113] Updated jenkinsfile --- Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile b/Jenkinsfile index 84dd1b6..0cbd2b6 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -89,6 +89,7 @@ pipeline { ''' def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json ${jsonCluster}") def CLUSTERID = (readJSON text: clusterRespString)['cluster_id'] + } } } } From b5342478ebb0b9b6c6c8b376c3e4bd1a88ff92fc Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 1 Jun 2022 14:36:10 +0300 Subject: [PATCH 072/113] Updated jenkinsfile --- Jenkinsfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 0cbd2b6..529620b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -88,7 +88,8 @@ pipeline { } ''' def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json ${jsonCluster}") - def CLUSTERID = (readJSON text: clusterRespString)['cluster_id'] + def clusterRespJson = readJSON text: clusterRespString + def CLUSTERID = clusterRespJson['cluster_id'] } } } From d88234eeff56288211f2f543c9b3bf10b98ef5bf Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 1 Jun 2022 14:38:41 +0300 Subject: [PATCH 073/113] Updated jenkinsfile --- Jenkinsfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 529620b..b6a3a33 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -90,7 +90,6 @@ pipeline { def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json ${jsonCluster}") def clusterRespJson = readJSON text: clusterRespString def CLUSTERID = clusterRespJson['cluster_id'] - } } } } From f4db299ab7175c9054be760c2587da3a08ca7875 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 1 Jun 2022 14:49:27 +0300 Subject: [PATCH 074/113] Updated jenkinsfile --- Jenkinsfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index b6a3a33..92aee7b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -17,6 +17,8 @@ def SPARK_OCR_VERSION = "3.12.0" def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION) def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) +def databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime + pipeline { agent { From 8f52b6ed075697a20eb11b99a1c98c280156b885 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 1 Jun 2022 14:50:03 +0300 Subject: [PATCH 075/113] Updated jenkinsfile --- Jenkinsfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 92aee7b..27d1232 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -34,8 +34,7 @@ pipeline { choice( name:'databricks_runtime', choices:'6.4.x-esr-scala2.11\n7.3.x-cpu-ml-scala2.12\n7.3.x-hls-scala2.12\n10.2.x-gpu-ml-scala2.12\n10.5.x-aarch64-scala2.12\n7.3.x-gpu-ml-scala2.12\n10.2.x-aarch64-photon-scala2.12\n10.4.x-cpu-ml-scala2.12\n9.1.x-aarch64-scala2.12\n10.1.x-photon-scala2.12\n9.1.x-photon-scala2.12\n10.4.x-scala2.12\n10.2.x-photon-scala2.12\n10.4.x-photon-scala2.12\n11.0.x-photon-scala2.12\n10.3.x-photon-scala2.12\n10.5.x-photon-scala2.12\n10.1.x-gpu-ml-scala2.12\n9.1.x-scala2.12\n11.0.x-scala2.12\n10.3.x-cpu-ml-scala2.12\n10.3.x-aarch64-photon-scala2.12\n11.0.x-gpu-ml-scala2.12\n10.5.x-aarch64-photon-scala2.12\n10.1.x-cpu-ml-scala2.12\n10.4.x-aarch64-photon-scala2.12\n10.5.x-gpu-ml-scala2.12\napache-spark-2.4.x-esr-scala2.11\n10.1.x-scala2.12\n9.1.x-cpu-ml-scala2.12\n11.0.x-cpu-ml-scala2.12\n10.2.x-aarch64-scala2.12\n10.2.x-scala2.12\n10.2.x-cpu-ml-scala2.12\n11.0.x-aarch64-photon-scala2.12\n10.4.x-aarch64-scala2.12\n11.0.x-aarch64-scala2.12\n10.1.x-aarch64-scala2.12\n9.1.x-gpu-ml-scala2.12\napache-spark-2.4.x-scala2.11\n10.5.x-scala2.12\n7.3.x-scala2.12\n10.3.x-scala2.12\n10.3.x-aarch64-scala2.12\n10.5.x-cpu-ml-scala2.12\n10.3.x-gpu-ml-scala2.12\n10.4.x-gpu-ml-scala2.12', - description:'define spark version', - defaultValue: '7.3.x-scala2.12' + description:'define spark version' ) } stages { From 206628909ccc87f1929cd92f1bde30d455e3a7d6 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 1 Jun 2022 14:54:15 +0300 Subject: [PATCH 076/113] Updated jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 27d1232..5fa51b0 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -58,7 +58,7 @@ pipeline { stage('Create Cluster') { steps { script { - withCredentials([string(credentialsId:'TEST_SPARK_OCR_LICENSE',variable:'SPARK_OCR_LICENSE'),[ + withCredentials([string(credentialsId:'TEST_SPARK_NLP_LICENSE',variable:'SPARK_OCR_LICENSE'),[ $class: 'AmazonWebServicesCredentialsBinding', credentialsId: 'a4362e3b-808e-45e0-b7d2-1c62b0572df4', accessKeyVariable: 'AWS_ACCESS_KEY_ID', From 1900ad2eaff9b1f8e991278f5e10a9172d961ea8 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 1 Jun 2022 15:04:52 +0300 Subject: [PATCH 077/113] Updated jenkinsfile --- Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile b/Jenkinsfile index 5fa51b0..89f8ecb 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -88,6 +88,7 @@ pipeline { "autotermination_minutes": 20, } ''' + echo "${jsonCluster}" def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json ${jsonCluster}") def clusterRespJson = readJSON text: clusterRespString def CLUSTERID = clusterRespJson['cluster_id'] From f99d57e73034611ac48a3d075d8658647d9c133c Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 1 Jun 2022 15:15:04 +0300 Subject: [PATCH 078/113] Updated jenkinsfile --- Jenkinsfile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 89f8ecb..5edc5d3 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,5 +1,7 @@ @Library('jenkinslib')_ +databricks_runtime = "" + def DBTOKEN = "DATABRICKS_TOKEN" def DBURL = "https://dbc-6ca13d9d-74bb.cloud.databricks.com" //def CLUSTERID = "0428-112519-vaxgi8gx" @@ -17,7 +19,7 @@ def SPARK_OCR_VERSION = "3.12.0" def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION) def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) -def databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime +databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime pipeline { @@ -63,7 +65,7 @@ pipeline { credentialsId: 'a4362e3b-808e-45e0-b7d2-1c62b0572df4', accessKeyVariable: 'AWS_ACCESS_KEY_ID', secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']]) { - def jsonCluster = ''' + def jsonCluster = """ { "num_workers": 1, "cluster_name": "Spark Ocr Notebook Test", @@ -87,7 +89,7 @@ pipeline { }, "autotermination_minutes": 20, } - ''' + """ echo "${jsonCluster}" def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json ${jsonCluster}") def clusterRespJson = readJSON text: clusterRespString From 3735bcebe8c7d764434138be529c58a59de3bf7a Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 1 Jun 2022 15:40:15 +0300 Subject: [PATCH 079/113] Updated jenkinsfile --- Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 5edc5d3..f17d4be 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -90,8 +90,8 @@ pipeline { "autotermination_minutes": 20, } """ - echo "${jsonCluster}" - def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json ${jsonCluster}") + sh('echo "${jsonCluster}" > cluster.json') + def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json-file cluster.json") def clusterRespJson = readJSON text: clusterRespString def CLUSTERID = clusterRespJson['cluster_id'] } From cb93a6e40d94b6e926c82338e5414bf43f843fb4 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 1 Jun 2022 15:44:47 +0300 Subject: [PATCH 080/113] Updated jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index f17d4be..81df998 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -90,7 +90,7 @@ pipeline { "autotermination_minutes": 20, } """ - sh('echo "${jsonCluster}" > cluster.json') + writeFile file: 'cluster.json', text: jsonCluster def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json-file cluster.json") def clusterRespJson = readJSON text: clusterRespString def CLUSTERID = clusterRespJson['cluster_id'] From b472d9dcaf2a1e94978d4ecc8e04becc0ad45dd6 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 1 Jun 2022 15:47:09 +0300 Subject: [PATCH 081/113] Updated jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 81df998..92ee31b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -87,7 +87,7 @@ pipeline { "AWS_ACCESS_KEY_ID": "${AWS_ACCESS_KEY_ID}", "AWS_SECRET_ACCESS_KEY": "${AWS_SECRET_ACCESS_KEY}" }, - "autotermination_minutes": 20, + "autotermination_minutes": 20 } """ writeFile file: 'cluster.json', text: jsonCluster From 776a83a22817366b70a22b8c8c42756a7df48046 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 1 Jun 2022 15:49:53 +0300 Subject: [PATCH 082/113] Updated jenkinsfile --- Jenkinsfile | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 92ee31b..09c2a7d 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,10 +1,11 @@ @Library('jenkinslib')_ databricks_runtime = "" +cluster_id = "" def DBTOKEN = "DATABRICKS_TOKEN" def DBURL = "https://dbc-6ca13d9d-74bb.cloud.databricks.com" -//def CLUSTERID = "0428-112519-vaxgi8gx" +//def cluster_id = "0428-112519-vaxgi8gx" def SCRIPTPATH = "./.ci" def NOTEBOOKPATH = "./databricks/python" def WORKSPACEPATH = "/Shared/Spark OCR/tests" @@ -93,7 +94,7 @@ pipeline { writeFile file: 'cluster.json', text: jsonCluster def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json-file cluster.json") def clusterRespJson = readJSON text: clusterRespString - def CLUSTERID = clusterRespJson['cluster_id'] + cluster_id = clusterRespJson['cluster_id'] } } } @@ -101,30 +102,30 @@ pipeline { stage('Install deps to Cluster') { steps { script { - sh("databricks libraries uninstall --cluster-id ${CLUSTERID} --all") - sh("databricks libraries install --cluster-id ${CLUSTERID} --jar s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-spark30.jar") - sh("databricks libraries install --cluster-id ${CLUSTERID} --jar s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}.jar") - sh("databricks libraries install --cluster-id ${CLUSTERID} --maven-coordinates com.johnsnowlabs.nlp:spark-nlp_2.12:${SPARK_NLP_VERSION}") - sh("databricks libraries install --cluster-id ${CLUSTERID} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/spark-ocr/spark_ocr-${SPARK_OCR_VERSION}+spark30-py3-none-any.whl") - sh("databricks libraries install --cluster-id ${CLUSTERID} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl/spark_nlp_jsl-${SPARK_NLP_VERSION}-py3-none-any.whl") - sh("databricks libraries install --cluster-id ${CLUSTERID} --pypi-package spark-nlp==${SPARK_NLP_VERSION}") + sh("databricks libraries uninstall --cluster-id ${cluster_id} --all") + sh("databricks libraries install --cluster-id ${cluster_id} --jar s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-spark30.jar") + sh("databricks libraries install --cluster-id ${cluster_id} --jar s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}.jar") + sh("databricks libraries install --cluster-id ${cluster_id} --maven-coordinates com.johnsnowlabs.nlp:spark-nlp_2.12:${SPARK_NLP_VERSION}") + sh("databricks libraries install --cluster-id ${cluster_id} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/spark-ocr/spark_ocr-${SPARK_OCR_VERSION}+spark30-py3-none-any.whl") + sh("databricks libraries install --cluster-id ${cluster_id} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl/spark_nlp_jsl-${SPARK_NLP_VERSION}-py3-none-any.whl") + sh("databricks libraries install --cluster-id ${cluster_id} --pypi-package spark-nlp==${SPARK_NLP_VERSION}") } } } stage('Start cluster') { steps { script { - def respString = sh script: "databricks clusters get --cluster-id ${CLUSTERID}", returnStdout: true + def respString = sh script: "databricks clusters get --cluster-id ${cluster_id}", returnStdout: true def respJson = readJSON text: respString if (respJson['state'] == 'RUNNING') { - sh("databricks clusters restart --cluster-id ${CLUSTERID}") + sh("databricks clusters restart --cluster-id ${cluster_id}") } else { - sh("databricks clusters start --cluster-id ${CLUSTERID}") + sh("databricks clusters start --cluster-id ${cluster_id}") } timeout(10) { waitUntil { script { - def respStringWait = sh script: "databricks clusters get --cluster-id ${CLUSTERID}", returnStdout: true + def respStringWait = sh script: "databricks clusters get --cluster-id ${cluster_id}", returnStdout: true def respJsonWait = readJSON text: respStringWait return (respJsonWait['state'] == 'RUNNING'); } @@ -139,7 +140,7 @@ pipeline { withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { sh """python3 $SCRIPTPATH/executenotebook.py --workspace=$DBURL\ --token=$TOKEN\ - --clusterid=$CLUSTERID\ + --clusterid=$cluster_id\ --localpath=${NOTEBOOKPATH}\ --workspacepath='${WORKSPACEPATH}'\ --outfilepath='${OUTFILEPATH}'\ From 2a1e5b489dfcf80548329883e516e3d253c5d06b Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 1 Jun 2022 15:52:51 +0300 Subject: [PATCH 083/113] Updated jenkinsfile --- Jenkinsfile | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 09c2a7d..9183d8c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -36,7 +36,7 @@ pipeline { parameters { choice( name:'databricks_runtime', - choices:'6.4.x-esr-scala2.11\n7.3.x-cpu-ml-scala2.12\n7.3.x-hls-scala2.12\n10.2.x-gpu-ml-scala2.12\n10.5.x-aarch64-scala2.12\n7.3.x-gpu-ml-scala2.12\n10.2.x-aarch64-photon-scala2.12\n10.4.x-cpu-ml-scala2.12\n9.1.x-aarch64-scala2.12\n10.1.x-photon-scala2.12\n9.1.x-photon-scala2.12\n10.4.x-scala2.12\n10.2.x-photon-scala2.12\n10.4.x-photon-scala2.12\n11.0.x-photon-scala2.12\n10.3.x-photon-scala2.12\n10.5.x-photon-scala2.12\n10.1.x-gpu-ml-scala2.12\n9.1.x-scala2.12\n11.0.x-scala2.12\n10.3.x-cpu-ml-scala2.12\n10.3.x-aarch64-photon-scala2.12\n11.0.x-gpu-ml-scala2.12\n10.5.x-aarch64-photon-scala2.12\n10.1.x-cpu-ml-scala2.12\n10.4.x-aarch64-photon-scala2.12\n10.5.x-gpu-ml-scala2.12\napache-spark-2.4.x-esr-scala2.11\n10.1.x-scala2.12\n9.1.x-cpu-ml-scala2.12\n11.0.x-cpu-ml-scala2.12\n10.2.x-aarch64-scala2.12\n10.2.x-scala2.12\n10.2.x-cpu-ml-scala2.12\n11.0.x-aarch64-photon-scala2.12\n10.4.x-aarch64-scala2.12\n11.0.x-aarch64-scala2.12\n10.1.x-aarch64-scala2.12\n9.1.x-gpu-ml-scala2.12\napache-spark-2.4.x-scala2.11\n10.5.x-scala2.12\n7.3.x-scala2.12\n10.3.x-scala2.12\n10.3.x-aarch64-scala2.12\n10.5.x-cpu-ml-scala2.12\n10.3.x-gpu-ml-scala2.12\n10.4.x-gpu-ml-scala2.12', + choices:'7.3.x-scala2.12\n6.4.x-esr-scala2.11\n7.3.x-cpu-ml-scala2.12\n7.3.x-hls-scala2.12\n10.2.x-gpu-ml-scala2.12\n10.5.x-aarch64-scala2.12\n7.3.x-gpu-ml-scala2.12\n10.2.x-aarch64-photon-scala2.12\n10.4.x-cpu-ml-scala2.12\n9.1.x-aarch64-scala2.12\n10.1.x-photon-scala2.12\n9.1.x-photon-scala2.12\n10.4.x-scala2.12\n10.2.x-photon-scala2.12\n10.4.x-photon-scala2.12\n11.0.x-photon-scala2.12\n10.3.x-photon-scala2.12\n10.5.x-photon-scala2.12\n10.1.x-gpu-ml-scala2.12\n9.1.x-scala2.12\n11.0.x-scala2.12\n10.3.x-cpu-ml-scala2.12\n10.3.x-aarch64-photon-scala2.12\n11.0.x-gpu-ml-scala2.12\n10.5.x-aarch64-photon-scala2.12\n10.1.x-cpu-ml-scala2.12\n10.4.x-aarch64-photon-scala2.12\n10.5.x-gpu-ml-scala2.12\napache-spark-2.4.x-esr-scala2.11\n10.1.x-scala2.12\n9.1.x-cpu-ml-scala2.12\n11.0.x-cpu-ml-scala2.12\n10.2.x-aarch64-scala2.12\n10.2.x-scala2.12\n10.2.x-cpu-ml-scala2.12\n11.0.x-aarch64-photon-scala2.12\n10.4.x-aarch64-scala2.12\n11.0.x-aarch64-scala2.12\n10.1.x-aarch64-scala2.12\n9.1.x-gpu-ml-scala2.12\napache-spark-2.4.x-scala2.11\n10.5.x-scala2.12\n10.3.x-scala2.12\n10.3.x-aarch64-scala2.12\n10.5.x-cpu-ml-scala2.12\n10.3.x-gpu-ml-scala2.12\n10.4.x-gpu-ml-scala2.12', description:'define spark version' ) } @@ -102,7 +102,7 @@ pipeline { stage('Install deps to Cluster') { steps { script { - sh("databricks libraries uninstall --cluster-id ${cluster_id} --all") + //sh("databricks libraries uninstall --cluster-id ${cluster_id} --all") sh("databricks libraries install --cluster-id ${cluster_id} --jar s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-spark30.jar") sh("databricks libraries install --cluster-id ${cluster_id} --jar s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}.jar") sh("databricks libraries install --cluster-id ${cluster_id} --maven-coordinates com.johnsnowlabs.nlp:spark-nlp_2.12:${SPARK_NLP_VERSION}") @@ -112,28 +112,28 @@ pipeline { } } } - stage('Start cluster') { - steps { - script { - def respString = sh script: "databricks clusters get --cluster-id ${cluster_id}", returnStdout: true - def respJson = readJSON text: respString - if (respJson['state'] == 'RUNNING') { - sh("databricks clusters restart --cluster-id ${cluster_id}") - } else { - sh("databricks clusters start --cluster-id ${cluster_id}") - } - timeout(10) { - waitUntil { - script { - def respStringWait = sh script: "databricks clusters get --cluster-id ${cluster_id}", returnStdout: true - def respJsonWait = readJSON text: respStringWait - return (respJsonWait['state'] == 'RUNNING'); - } - } - } - } - } - } +// stage('Start cluster') { +// steps { +// script { +// def respString = sh script: "databricks clusters get --cluster-id ${cluster_id}", returnStdout: true +// def respJson = readJSON text: respString +// if (respJson['state'] == 'RUNNING') { +// sh("databricks clusters restart --cluster-id ${cluster_id}") +// } else { +// sh("databricks clusters start --cluster-id ${cluster_id}") +// } +// timeout(10) { +// waitUntil { +// script { +// def respStringWait = sh script: "databricks clusters get --cluster-id ${cluster_id}", returnStdout: true +// def respJsonWait = readJSON text: respStringWait +// return (respJsonWait['state'] == 'RUNNING'); +// } +// } +// } +// } +// } +// } stage('Run Notebook Tests') { steps { script { From 4eac3acf6d651946bd2bc581678266c078b8bdf2 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 1 Jun 2022 18:55:59 +0300 Subject: [PATCH 084/113] Updated jenkinsfile --- Jenkinsfile | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index 9183d8c..38e78ba 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -109,6 +109,15 @@ pipeline { sh("databricks libraries install --cluster-id ${cluster_id} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/spark-ocr/spark_ocr-${SPARK_OCR_VERSION}+spark30-py3-none-any.whl") sh("databricks libraries install --cluster-id ${cluster_id} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl/spark_nlp_jsl-${SPARK_NLP_VERSION}-py3-none-any.whl") sh("databricks libraries install --cluster-id ${cluster_id} --pypi-package spark-nlp==${SPARK_NLP_VERSION}") + timeout(10) { + waitUntil { + script { + def respStringWaitLib = sh script: "databricks libraries cluster-status --cluster-id ${cluster_id}", returnStdout: true + def respJsonWaitLib = readJSON text: respStringWaitLib + return (respJsonWaitLib['library_statuses'].every{ it['status'] == 'INSTALLED'} ); + } + } + } } } } From 576922fd0a8e14dfd5853414c2cb277508ac5a0b Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 1 Jun 2022 20:19:01 +0300 Subject: [PATCH 085/113] Updated jenkinsfile --- .ci/evaluatenotebookruns.py | 2 ++ Jenkinsfile | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.ci/evaluatenotebookruns.py b/.ci/evaluatenotebookruns.py index ddc0d99..e71fe26 100644 --- a/.ci/evaluatenotebookruns.py +++ b/.ci/evaluatenotebookruns.py @@ -34,7 +34,9 @@ def test_job_run(self): for filename in glob.glob(os.path.join(path, '*.json')): logging.info('Evaluating: ' + filename) + print('Evaluating: ' + filename) data = json.load(open(filename)) + print(data) if data['state']['life_cycle_state'] == "RUNNING": statuses.append('NOT_COMPLETED') else: diff --git a/Jenkinsfile b/Jenkinsfile index 38e78ba..54b3e7d 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -149,15 +149,17 @@ pipeline { withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { sh """python3 $SCRIPTPATH/executenotebook.py --workspace=$DBURL\ --token=$TOKEN\ - --clusterid=$cluster_id\ + --clusterid=${cluster_id}\ --localpath=${NOTEBOOKPATH}\ --workspacepath='${WORKSPACEPATH}'\ --outfilepath='${OUTFILEPATH}'\ --ignore='${IGNORE}' """ + sh "ls *.json" sh """sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py """ + } } } From efde4e3b8d38a788eb66b6c679d38eaceac5b0a9 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Wed, 1 Jun 2022 21:13:13 +0300 Subject: [PATCH 086/113] Updated jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 54b3e7d..2fc57d3 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -95,6 +95,7 @@ pipeline { def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json-file cluster.json") def clusterRespJson = readJSON text: clusterRespString cluster_id = clusterRespJson['cluster_id'] + sh "rm cluster.json" } } } @@ -155,7 +156,6 @@ pipeline { --outfilepath='${OUTFILEPATH}'\ --ignore='${IGNORE}' """ - sh "ls *.json" sh """sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py """ From 8cd879a1d652691133dbdfb341e45b4c983e38d7 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 2 Jun 2022 08:57:47 +0300 Subject: [PATCH 087/113] Updated jenkinsfile --- Jenkinsfile | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index 2fc57d3..6796b61 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -2,6 +2,7 @@ databricks_runtime = "" cluster_id = "" +ocr_versions = "" def DBTOKEN = "DATABRICKS_TOKEN" def DBURL = "https://dbc-6ca13d9d-74bb.cloud.databricks.com" @@ -23,6 +24,10 @@ def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime +def sparkOcrVesrionsString = sh(returnStdout: true, script: 'gh api -H "Accept: application/vnd.github.v3+json" /repos/johnsnowlabs/spark-ocr/releases') +def sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString +ocr_versions = sparkOcrVesrionsStringJson.collect{ it['name']}.join("\n") + pipeline { agent { dockerfile { @@ -40,6 +45,13 @@ pipeline { description:'define spark version' ) } + parameters { + choice( + name:'ocr_version', + choices: ocr_versions, + description:'Spark Ocr Version' + ) + } stages { stage('Setup') { steps { From c5f802ab62cb2caf3050bd2e50b092ad0c96d9f6 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 2 Jun 2022 08:58:42 +0300 Subject: [PATCH 088/113] Updated jenkinsfile --- Jenkinsfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 6796b61..e05cbd5 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -43,9 +43,7 @@ pipeline { name:'databricks_runtime', choices:'7.3.x-scala2.12\n6.4.x-esr-scala2.11\n7.3.x-cpu-ml-scala2.12\n7.3.x-hls-scala2.12\n10.2.x-gpu-ml-scala2.12\n10.5.x-aarch64-scala2.12\n7.3.x-gpu-ml-scala2.12\n10.2.x-aarch64-photon-scala2.12\n10.4.x-cpu-ml-scala2.12\n9.1.x-aarch64-scala2.12\n10.1.x-photon-scala2.12\n9.1.x-photon-scala2.12\n10.4.x-scala2.12\n10.2.x-photon-scala2.12\n10.4.x-photon-scala2.12\n11.0.x-photon-scala2.12\n10.3.x-photon-scala2.12\n10.5.x-photon-scala2.12\n10.1.x-gpu-ml-scala2.12\n9.1.x-scala2.12\n11.0.x-scala2.12\n10.3.x-cpu-ml-scala2.12\n10.3.x-aarch64-photon-scala2.12\n11.0.x-gpu-ml-scala2.12\n10.5.x-aarch64-photon-scala2.12\n10.1.x-cpu-ml-scala2.12\n10.4.x-aarch64-photon-scala2.12\n10.5.x-gpu-ml-scala2.12\napache-spark-2.4.x-esr-scala2.11\n10.1.x-scala2.12\n9.1.x-cpu-ml-scala2.12\n11.0.x-cpu-ml-scala2.12\n10.2.x-aarch64-scala2.12\n10.2.x-scala2.12\n10.2.x-cpu-ml-scala2.12\n11.0.x-aarch64-photon-scala2.12\n10.4.x-aarch64-scala2.12\n11.0.x-aarch64-scala2.12\n10.1.x-aarch64-scala2.12\n9.1.x-gpu-ml-scala2.12\napache-spark-2.4.x-scala2.11\n10.5.x-scala2.12\n10.3.x-scala2.12\n10.3.x-aarch64-scala2.12\n10.5.x-cpu-ml-scala2.12\n10.3.x-gpu-ml-scala2.12\n10.4.x-gpu-ml-scala2.12', description:'define spark version' - ) - } - parameters { + ), choice( name:'ocr_version', choices: ocr_versions, From ecd1dab0749ee4a8d2fa204fc63843da80ac10ec Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 2 Jun 2022 08:59:53 +0300 Subject: [PATCH 089/113] Updated jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index e05cbd5..2f6c401 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -43,7 +43,7 @@ pipeline { name:'databricks_runtime', choices:'7.3.x-scala2.12\n6.4.x-esr-scala2.11\n7.3.x-cpu-ml-scala2.12\n7.3.x-hls-scala2.12\n10.2.x-gpu-ml-scala2.12\n10.5.x-aarch64-scala2.12\n7.3.x-gpu-ml-scala2.12\n10.2.x-aarch64-photon-scala2.12\n10.4.x-cpu-ml-scala2.12\n9.1.x-aarch64-scala2.12\n10.1.x-photon-scala2.12\n9.1.x-photon-scala2.12\n10.4.x-scala2.12\n10.2.x-photon-scala2.12\n10.4.x-photon-scala2.12\n11.0.x-photon-scala2.12\n10.3.x-photon-scala2.12\n10.5.x-photon-scala2.12\n10.1.x-gpu-ml-scala2.12\n9.1.x-scala2.12\n11.0.x-scala2.12\n10.3.x-cpu-ml-scala2.12\n10.3.x-aarch64-photon-scala2.12\n11.0.x-gpu-ml-scala2.12\n10.5.x-aarch64-photon-scala2.12\n10.1.x-cpu-ml-scala2.12\n10.4.x-aarch64-photon-scala2.12\n10.5.x-gpu-ml-scala2.12\napache-spark-2.4.x-esr-scala2.11\n10.1.x-scala2.12\n9.1.x-cpu-ml-scala2.12\n11.0.x-cpu-ml-scala2.12\n10.2.x-aarch64-scala2.12\n10.2.x-scala2.12\n10.2.x-cpu-ml-scala2.12\n11.0.x-aarch64-photon-scala2.12\n10.4.x-aarch64-scala2.12\n11.0.x-aarch64-scala2.12\n10.1.x-aarch64-scala2.12\n9.1.x-gpu-ml-scala2.12\napache-spark-2.4.x-scala2.11\n10.5.x-scala2.12\n10.3.x-scala2.12\n10.3.x-aarch64-scala2.12\n10.5.x-cpu-ml-scala2.12\n10.3.x-gpu-ml-scala2.12\n10.4.x-gpu-ml-scala2.12', description:'define spark version' - ), + ) choice( name:'ocr_version', choices: ocr_versions, From bc18891d1b976e10efcb9536d85251d993df6eb4 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 2 Jun 2022 09:01:22 +0300 Subject: [PATCH 090/113] Updated jenkinsfile --- Jenkinsfile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 2f6c401..77417c2 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -24,9 +24,11 @@ def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime -def sparkOcrVesrionsString = sh(returnStdout: true, script: 'gh api -H "Accept: application/vnd.github.v3+json" /repos/johnsnowlabs/spark-ocr/releases') -def sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString -ocr_versions = sparkOcrVesrionsStringJson.collect{ it['name']}.join("\n") +node { + def sparkOcrVesrionsString = sh(returnStdout: true, script: 'gh api -H "Accept: application/vnd.github.v3+json" /repos/johnsnowlabs/spark-ocr/releases') + def sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString + ocr_versions = sparkOcrVesrionsStringJson.collect{ it['name']}.join("\n") +} pipeline { agent { From 19f80e22acd3e2af3c83eca9bce61e471c1e28df Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 2 Jun 2022 09:03:16 +0300 Subject: [PATCH 091/113] Updated jenkinsfile --- Jenkinsfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index 77417c2..5ef03d3 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -25,9 +25,13 @@ databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : par node { + withCredentials([usernamePassword(credentialsId: '55e7e818-4ccf-4d23-b54c-fd97c21081ba', + usernameVariable: 'GITHUB_USER', + passwordVariable: 'GITHUB_TOKEN')]) { def sparkOcrVesrionsString = sh(returnStdout: true, script: 'gh api -H "Accept: application/vnd.github.v3+json" /repos/johnsnowlabs/spark-ocr/releases') def sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString ocr_versions = sparkOcrVesrionsStringJson.collect{ it['name']}.join("\n") + } } pipeline { From 4975e8d2ff71e376dac8139387d021263bf54091 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 2 Jun 2022 09:19:04 +0300 Subject: [PATCH 092/113] Updated jenkinsfile --- Jenkinsfile | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 5ef03d3..faa0fce 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -3,6 +3,7 @@ databricks_runtime = "" cluster_id = "" ocr_versions = "" +nlp_versions = "" def DBTOKEN = "DATABRICKS_TOKEN" def DBURL = "https://dbc-6ca13d9d-74bb.cloud.databricks.com" @@ -25,12 +26,19 @@ databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : par node { + + def get_releases(repo) + { + String sparkOcrVesrionsString = sh(returnStdout: true, script: """gh api -H "Accept: application/vnd.github.v3+json" /repos/${repo}/releases""") + String sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString + return sparkOcrVesrionsStringJson.collect{ it['name']}.join("\n") + } withCredentials([usernamePassword(credentialsId: '55e7e818-4ccf-4d23-b54c-fd97c21081ba', usernameVariable: 'GITHUB_USER', passwordVariable: 'GITHUB_TOKEN')]) { - def sparkOcrVesrionsString = sh(returnStdout: true, script: 'gh api -H "Accept: application/vnd.github.v3+json" /repos/johnsnowlabs/spark-ocr/releases') - def sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString - ocr_versions = sparkOcrVesrionsStringJson.collect{ it['name']}.join("\n") + ocr_versions = get_releases("johnsnowlabs/spark-ocr") + nlp_versions = get_releases("johnsnowlabs/spark-nlp") + } } @@ -55,6 +63,11 @@ pipeline { choices: ocr_versions, description:'Spark Ocr Version' ) + choice( + name:'nlp_version', + choices: nlp_versions, + description:'Spark Nlp Version' + ) } stages { stage('Setup') { From 117b1514bd0f603eb4025b1ab02a5a0248c6c3cd Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 2 Jun 2022 09:22:09 +0300 Subject: [PATCH 093/113] Updated jenkinsfile --- Jenkinsfile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index faa0fce..ba74520 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -24,15 +24,15 @@ def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime +def get_releases(repo) +{ + String sparkOcrVesrionsString = sh(returnStdout: true, script: """gh api -H "Accept: application/vnd.github.v3+json" /repos/${repo}/releases""") + String sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString + return sparkOcrVesrionsStringJson.collect{ it['name']}.join("\n") +} node { - def get_releases(repo) - { - String sparkOcrVesrionsString = sh(returnStdout: true, script: """gh api -H "Accept: application/vnd.github.v3+json" /repos/${repo}/releases""") - String sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString - return sparkOcrVesrionsStringJson.collect{ it['name']}.join("\n") - } withCredentials([usernamePassword(credentialsId: '55e7e818-4ccf-4d23-b54c-fd97c21081ba', usernameVariable: 'GITHUB_USER', passwordVariable: 'GITHUB_TOKEN')]) { From 29d1bf0851bbe1dd401a5b238e8e6b0b07251aae Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 2 Jun 2022 09:24:35 +0300 Subject: [PATCH 094/113] Updated jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index ba74520..0660ff7 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -24,7 +24,7 @@ def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime -def get_releases(repo) +def String get_releases(repo) { String sparkOcrVesrionsString = sh(returnStdout: true, script: """gh api -H "Accept: application/vnd.github.v3+json" /repos/${repo}/releases""") String sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString From c066e7676cb0ec8b0cc6554d5859f35c814e266a Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 2 Jun 2022 09:26:06 +0300 Subject: [PATCH 095/113] Updated jenkinsfile --- Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 0660ff7..3e321ff 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -26,8 +26,8 @@ databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : par def String get_releases(repo) { - String sparkOcrVesrionsString = sh(returnStdout: true, script: """gh api -H "Accept: application/vnd.github.v3+json" /repos/${repo}/releases""") - String sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString + def sparkOcrVesrionsString = sh(returnStdout: true, script: """gh api -H "Accept: application/vnd.github.v3+json" /repos/${repo}/releases""") + def sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString return sparkOcrVesrionsStringJson.collect{ it['name']}.join("\n") } From 6c1491e6f5bcee124f4a62810e507d26cf07384b Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 2 Jun 2022 09:29:16 +0300 Subject: [PATCH 096/113] Updated jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 3e321ff..043e9e1 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -28,7 +28,7 @@ def String get_releases(repo) { def sparkOcrVesrionsString = sh(returnStdout: true, script: """gh api -H "Accept: application/vnd.github.v3+json" /repos/${repo}/releases""") def sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString - return sparkOcrVesrionsStringJson.collect{ it['name']}.join("\n") + return sparkOcrVesrionsStringJson.collect{ it['tag_name']}.join("\n") } node { From b31f40774ae0aace8a5e6f203b82268e1cbe297d Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 2 Jun 2022 10:06:31 +0300 Subject: [PATCH 097/113] Updated jenkinsfile --- Jenkinsfile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index 043e9e1..056878f 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -4,6 +4,7 @@ databricks_runtime = "" cluster_id = "" ocr_versions = "" nlp_versions = "" +nlp_healthcare_versions = "" def DBTOKEN = "DATABRICKS_TOKEN" def DBURL = "https://dbc-6ca13d9d-74bb.cloud.databricks.com" @@ -38,8 +39,14 @@ node { passwordVariable: 'GITHUB_TOKEN')]) { ocr_versions = get_releases("johnsnowlabs/spark-ocr") nlp_versions = get_releases("johnsnowlabs/spark-nlp") + nlp_healthcare_versions = get_releases("johnsnowlabs/spark-nlp-internal") } + withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { + + def databricks_versions = sh(returnStdout: true, script:'curl --header "Authorization: Bearer $TOKEN" -X GET https://dbc-6ca13d9d-74bb.cloud.databricks.com/api/2.0/clusters/spark-versions') + echo(databricks_versions) + } } pipeline { From 6b258e528668d45e6196ee1834070a391ce0ede2 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 2 Jun 2022 10:17:39 +0300 Subject: [PATCH 098/113] Updated jenkinsfile --- Jenkinsfile | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 056878f..b459503 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -5,6 +5,7 @@ cluster_id = "" ocr_versions = "" nlp_versions = "" nlp_healthcare_versions = "" +databricks_versions = "" def DBTOKEN = "DATABRICKS_TOKEN" def DBURL = "https://dbc-6ca13d9d-74bb.cloud.databricks.com" @@ -33,7 +34,6 @@ def String get_releases(repo) } node { - withCredentials([usernamePassword(credentialsId: '55e7e818-4ccf-4d23-b54c-fd97c21081ba', usernameVariable: 'GITHUB_USER', passwordVariable: 'GITHUB_TOKEN')]) { @@ -44,8 +44,9 @@ node { } withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) { - def databricks_versions = sh(returnStdout: true, script:'curl --header "Authorization: Bearer $TOKEN" -X GET https://dbc-6ca13d9d-74bb.cloud.databricks.com/api/2.0/clusters/spark-versions') - echo(databricks_versions) + def databricksVersionsString = sh(returnStdout: true, script:'curl --header "Authorization: Bearer $TOKEN" -X GET https://dbc-6ca13d9d-74bb.cloud.databricks.com/api/2.0/clusters/spark-versions') + def databricksVersionsStringJson = readJSON text: databricksVersionsString + databricks_versions = databricksVersionsStringJson['versions'].collect{ it['key']}.join("\n") } } @@ -62,8 +63,8 @@ pipeline { parameters { choice( name:'databricks_runtime', - choices:'7.3.x-scala2.12\n6.4.x-esr-scala2.11\n7.3.x-cpu-ml-scala2.12\n7.3.x-hls-scala2.12\n10.2.x-gpu-ml-scala2.12\n10.5.x-aarch64-scala2.12\n7.3.x-gpu-ml-scala2.12\n10.2.x-aarch64-photon-scala2.12\n10.4.x-cpu-ml-scala2.12\n9.1.x-aarch64-scala2.12\n10.1.x-photon-scala2.12\n9.1.x-photon-scala2.12\n10.4.x-scala2.12\n10.2.x-photon-scala2.12\n10.4.x-photon-scala2.12\n11.0.x-photon-scala2.12\n10.3.x-photon-scala2.12\n10.5.x-photon-scala2.12\n10.1.x-gpu-ml-scala2.12\n9.1.x-scala2.12\n11.0.x-scala2.12\n10.3.x-cpu-ml-scala2.12\n10.3.x-aarch64-photon-scala2.12\n11.0.x-gpu-ml-scala2.12\n10.5.x-aarch64-photon-scala2.12\n10.1.x-cpu-ml-scala2.12\n10.4.x-aarch64-photon-scala2.12\n10.5.x-gpu-ml-scala2.12\napache-spark-2.4.x-esr-scala2.11\n10.1.x-scala2.12\n9.1.x-cpu-ml-scala2.12\n11.0.x-cpu-ml-scala2.12\n10.2.x-aarch64-scala2.12\n10.2.x-scala2.12\n10.2.x-cpu-ml-scala2.12\n11.0.x-aarch64-photon-scala2.12\n10.4.x-aarch64-scala2.12\n11.0.x-aarch64-scala2.12\n10.1.x-aarch64-scala2.12\n9.1.x-gpu-ml-scala2.12\napache-spark-2.4.x-scala2.11\n10.5.x-scala2.12\n10.3.x-scala2.12\n10.3.x-aarch64-scala2.12\n10.5.x-cpu-ml-scala2.12\n10.3.x-gpu-ml-scala2.12\n10.4.x-gpu-ml-scala2.12', - description:'define spark version' + choices: databricks_versions, + description: 'Databricks runtime version' ) choice( name:'ocr_version', From c1b2fb4e30da9e7c33c6719b53f7fe227981926b Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 2 Jun 2022 10:26:20 +0300 Subject: [PATCH 099/113] Updated jenkinsfile --- Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index b459503..df65b77 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -24,7 +24,7 @@ def SPARK_OCR_VERSION = "3.12.0" def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION) def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) -databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime +databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.split('|')[1] def String get_releases(repo) { @@ -46,7 +46,7 @@ node { def databricksVersionsString = sh(returnStdout: true, script:'curl --header "Authorization: Bearer $TOKEN" -X GET https://dbc-6ca13d9d-74bb.cloud.databricks.com/api/2.0/clusters/spark-versions') def databricksVersionsStringJson = readJSON text: databricksVersionsString - databricks_versions = databricksVersionsStringJson['versions'].collect{ it['key']}.join("\n") + databricks_versions = databricksVersionsStringJson['versions'].collect{ it['name'] +"|"+it['key']}.join("\n") } } From a72afceac495c2703c75e8dfd920308a3fdab449 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 2 Jun 2022 10:33:04 +0300 Subject: [PATCH 100/113] Updated jenkinsfile --- Jenkinsfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index df65b77..80c7bbb 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,6 +1,6 @@ @Library('jenkinslib')_ -databricks_runtime = "" +databricks_runtime_1 = "" cluster_id = "" ocr_versions = "" nlp_versions = "" @@ -24,7 +24,7 @@ def SPARK_OCR_VERSION = "3.12.0" def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION) def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) -databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.split('|')[1] +databricks_runtime_1 = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.split('|')[1] def String get_releases(repo) { @@ -46,7 +46,7 @@ node { def databricksVersionsString = sh(returnStdout: true, script:'curl --header "Authorization: Bearer $TOKEN" -X GET https://dbc-6ca13d9d-74bb.cloud.databricks.com/api/2.0/clusters/spark-versions') def databricksVersionsStringJson = readJSON text: databricksVersionsString - databricks_versions = databricksVersionsStringJson['versions'].collect{ it['name'] +"|"+it['key']}.join("\n") + databricks_versions = databricksVersionsStringJson['versions'].collect{ it['name'] + " |" + it['key']}.join("\n") } } @@ -107,7 +107,7 @@ pipeline { { "num_workers": 1, "cluster_name": "Spark Ocr Notebook Test", - "spark_version": "${databricks_runtime}", + "spark_version": "${databricks_runtime_1}", "spark_conf": { "spark.sql.legacy.allowUntypedScalaUDF": "true" }, From 3f54d87b09e856b045598f8ece4877e2a060fe0f Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 2 Jun 2022 10:34:49 +0300 Subject: [PATCH 101/113] Updated jenkinsfile --- Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile b/Jenkinsfile index 80c7bbb..33f0aff 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -103,6 +103,7 @@ pipeline { credentialsId: 'a4362e3b-808e-45e0-b7d2-1c62b0572df4', accessKeyVariable: 'AWS_ACCESS_KEY_ID', secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']]) { + echo databricks_runtime_1 def jsonCluster = """ { "num_workers": 1, From 61a3114f8f2a34c19ea410ea91d06918a8ada560 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 2 Jun 2022 10:37:47 +0300 Subject: [PATCH 102/113] Updated jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 33f0aff..af59374 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -24,7 +24,7 @@ def SPARK_OCR_VERSION = "3.12.0" def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION) def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) -databricks_runtime_1 = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.split('|')[1] +databricks_runtime_1 = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.tokenize('|')[1] def String get_releases(repo) { From 348f021a9f89f12a91c83db4da42a8743ec70766 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 2 Jun 2022 11:25:13 +0300 Subject: [PATCH 103/113] Updated jenkinsfile --- Jenkinsfile | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index af59374..16d30fb 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -46,7 +46,7 @@ node { def databricksVersionsString = sh(returnStdout: true, script:'curl --header "Authorization: Bearer $TOKEN" -X GET https://dbc-6ca13d9d-74bb.cloud.databricks.com/api/2.0/clusters/spark-versions') def databricksVersionsStringJson = readJSON text: databricksVersionsString - databricks_versions = databricksVersionsStringJson['versions'].collect{ it['name'] + " |" + it['key']}.join("\n") + databricks_versions = databricksVersionsStringJson['versions'].collect{ it['name'] + " |" + it['key']}.sort().join("\n") } } @@ -69,12 +69,17 @@ pipeline { choice( name:'ocr_version', choices: ocr_versions, - description:'Spark Ocr Version' + description:'Spark Ocr version' ) choice( name:'nlp_version', choices: nlp_versions, - description:'Spark Nlp Version' + description:'Spark Nlp version' + ) + choice( + name:'nlp_healthcare_version', + choices: nlp_healthcare_versions, + description:'Spark Nlp for Healthcare version' ) } stages { From 0136be3522b94deaf7364cf682a91b6cf1d6225f Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 2 Jun 2022 17:26:16 +0300 Subject: [PATCH 104/113] Updated jenkinsfile --- Jenkinsfile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 16d30fb..9c1b520 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,6 +1,6 @@ @Library('jenkinslib')_ -databricks_runtime_1 = "" +databricks_runtime = "" cluster_id = "" ocr_versions = "" nlp_versions = "" @@ -24,7 +24,7 @@ def SPARK_OCR_VERSION = "3.12.0" def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION) def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) -databricks_runtime_1 = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.tokenize('|')[1] +databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.tokenize('|')[1] def String get_releases(repo) { @@ -46,7 +46,7 @@ node { def databricksVersionsString = sh(returnStdout: true, script:'curl --header "Authorization: Bearer $TOKEN" -X GET https://dbc-6ca13d9d-74bb.cloud.databricks.com/api/2.0/clusters/spark-versions') def databricksVersionsStringJson = readJSON text: databricksVersionsString - databricks_versions = databricksVersionsStringJson['versions'].collect{ it['name'] + " |" + it['key']}.sort().join("\n") + databricks_versions = databricksVersionsStringJson['versions'].collect{ it['name'] + " |" + it['key']}.sort(false) { it.tokenize(' ')[0] as Integer }.join("\n") } } @@ -63,7 +63,7 @@ pipeline { parameters { choice( name:'databricks_runtime', - choices: databricks_versions, + choices: '7.3.x-scala2.12\n' + databricks_versions, description: 'Databricks runtime version' ) choice( @@ -108,12 +108,11 @@ pipeline { credentialsId: 'a4362e3b-808e-45e0-b7d2-1c62b0572df4', accessKeyVariable: 'AWS_ACCESS_KEY_ID', secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']]) { - echo databricks_runtime_1 def jsonCluster = """ { "num_workers": 1, "cluster_name": "Spark Ocr Notebook Test", - "spark_version": "${databricks_runtime_1}", + "spark_version": "${databricks_runtime}", "spark_conf": { "spark.sql.legacy.allowUntypedScalaUDF": "true" }, @@ -210,6 +209,7 @@ pipeline { } post { always { + sh "databricks clusters delete --cluster-id ${cluster_id}" sh "find ${OUTFILEPATH} -name '*.json' -exec rm {} +" junit allowEmptyResults: true, testResults: "**/reports/junit/*.xml" } From b1dd392d3046a230b03e5199d60d08448b943f05 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Thu, 2 Jun 2022 17:30:48 +0300 Subject: [PATCH 105/113] Updated jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 9c1b520..1c2accf 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -46,7 +46,7 @@ node { def databricksVersionsString = sh(returnStdout: true, script:'curl --header "Authorization: Bearer $TOKEN" -X GET https://dbc-6ca13d9d-74bb.cloud.databricks.com/api/2.0/clusters/spark-versions') def databricksVersionsStringJson = readJSON text: databricksVersionsString - databricks_versions = databricksVersionsStringJson['versions'].collect{ it['name'] + " |" + it['key']}.sort(false) { it.tokenize(' ')[0] as Integer }.join("\n") + databricks_versions = databricksVersionsStringJson['versions'].collect{ it['name'] + " |" + it['key']}.sort().join("\n") } } From 6f2e931665d10d1addcd603b761ef564f31580ba Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Fri, 3 Jun 2022 12:57:30 +0300 Subject: [PATCH 106/113] Updated jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 1c2accf..e1c5010 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -63,7 +63,7 @@ pipeline { parameters { choice( name:'databricks_runtime', - choices: '7.3.x-scala2.12\n' + databricks_versions, + choices: '7.3 LTS Spark 3.0.1 |7.3.x-scala2.12\n' + databricks_versions, description: 'Databricks runtime version' ) choice( From e55cca00381c2bdeba023cab6938f0724185b796 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Fri, 3 Jun 2022 13:01:01 +0300 Subject: [PATCH 107/113] Updated jenkinsfile --- Jenkinsfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index e1c5010..4770a3c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,6 +1,6 @@ @Library('jenkinslib')_ -databricks_runtime = "" +databricks_runtime_version = "" cluster_id = "" ocr_versions = "" nlp_versions = "" @@ -24,7 +24,7 @@ def SPARK_OCR_VERSION = "3.12.0" def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION) def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) -databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.tokenize('|')[1] +databricks_runtime_version = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.tokenize('|')[1] def String get_releases(repo) { @@ -112,7 +112,7 @@ pipeline { { "num_workers": 1, "cluster_name": "Spark Ocr Notebook Test", - "spark_version": "${databricks_runtime}", + "spark_version": "${databricks_runtime_version}", "spark_conf": { "spark.sql.legacy.allowUntypedScalaUDF": "true" }, From 4e0bb937e7c14be3b6ac16ab29684ce11ae4ebaa Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Fri, 3 Jun 2022 13:20:09 +0300 Subject: [PATCH 108/113] Updated jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 4770a3c..b079fb7 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -28,7 +28,7 @@ databricks_runtime_version = params.databricks_runtime == null ? '7.3.x-scala2.1 def String get_releases(repo) { - def sparkOcrVesrionsString = sh(returnStdout: true, script: """gh api -H "Accept: application/vnd.github.v3+json" /repos/${repo}/releases""") + def sparkOcrVesrionsString = sh(returnStdout: true, script: """gh api --paginate -H "Accept: application/vnd.github.v3+json" /repos/${repo}/releases""") def sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString return sparkOcrVesrionsStringJson.collect{ it['tag_name']}.join("\n") } From 7b9b0b1fafc842fe45d7e690c402cb25b02e600a Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Fri, 3 Jun 2022 14:33:59 +0300 Subject: [PATCH 109/113] Updated jenkinsfile --- Jenkinsfile | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index b079fb7..2101195 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -6,10 +6,10 @@ ocr_versions = "" nlp_versions = "" nlp_healthcare_versions = "" databricks_versions = "" +nlp_version_prefix = "" def DBTOKEN = "DATABRICKS_TOKEN" def DBURL = "https://dbc-6ca13d9d-74bb.cloud.databricks.com" -//def cluster_id = "0428-112519-vaxgi8gx" def SCRIPTPATH = "./.ci" def NOTEBOOKPATH = "./databricks/python" def WORKSPACEPATH = "/Shared/Spark OCR/tests" @@ -17,14 +17,22 @@ def OUTFILEPATH = "." def TESTRESULTPATH = "./reports/junit" def IGNORE = "3. Compare CPU and GPU image processing with Spark OCR.ipynb" -def SPARK_NLP_VERSION = "3.4.2" -def SPARK_NLP_HEALTHCARE_VERSION = "3.4.2" -def SPARK_OCR_VERSION = "3.12.0" +databricks_runtime_version = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.tokenize('|')[1] -def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION) -def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) -databricks_runtime_version = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.tokenize('|')[1] +switch(spark_version) { +case 'spark24': + nlp_version_prefix="-spark24" + break +case 'spark23': + nlp_version_prefix="-spark23" + break +case 'spark30': + nlp_version_prefix="" + break +case 'spark32': + nlp_version_prefix="-spark32" +} def String get_releases(repo) { @@ -71,6 +79,11 @@ pipeline { choices: ocr_versions, description:'Spark Ocr version' ) + choice( + name:'spark_version', + choices:'spark30\nspark32\nspark24\nspark23', + description:'define spark version' + ) choice( name:'nlp_version', choices: nlp_versions, @@ -145,11 +158,18 @@ pipeline { stage('Install deps to Cluster') { steps { script { + def SPARK_NLP_VERSION = params.nlp_version + def SPARK_NLP_HEALTHCARE_VERSION = params.nlp_healthcare_version + def SPARK_OCR_VERSION = params.ocr_version + + def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION) + def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) + //sh("databricks libraries uninstall --cluster-id ${cluster_id} --all") - sh("databricks libraries install --cluster-id ${cluster_id} --jar s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-spark30.jar") - sh("databricks libraries install --cluster-id ${cluster_id} --jar s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}.jar") - sh("databricks libraries install --cluster-id ${cluster_id} --maven-coordinates com.johnsnowlabs.nlp:spark-nlp_2.12:${SPARK_NLP_VERSION}") - sh("databricks libraries install --cluster-id ${cluster_id} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/spark-ocr/spark_ocr-${SPARK_OCR_VERSION}+spark30-py3-none-any.whl") + sh("databricks libraries install --cluster-id ${cluster_id} --jar s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-${spark_version}.jar") + sh("databricks libraries install --cluster-id ${cluster_id} --jar s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}${nlp_version_prefix}.jar") + sh("databricks libraries install --cluster-id ${cluster_id} --maven-coordinates com.johnsnowlabs.nlp:spark-nlp${nlp_version_prefix}_2.12:${SPARK_NLP_VERSION}") + sh("databricks libraries install --cluster-id ${cluster_id} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/spark-ocr/spark_ocr-${SPARK_OCR_VERSION}+${spark_version}-py3-none-any.whl") sh("databricks libraries install --cluster-id ${cluster_id} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl/spark_nlp_jsl-${SPARK_NLP_VERSION}-py3-none-any.whl") sh("databricks libraries install --cluster-id ${cluster_id} --pypi-package spark-nlp==${SPARK_NLP_VERSION}") timeout(10) { From b68cee2036391834e5970461df99a624f676db38 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Fri, 3 Jun 2022 14:36:03 +0300 Subject: [PATCH 110/113] Updated jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 2101195..e0ae489 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -18,7 +18,7 @@ def TESTRESULTPATH = "./reports/junit" def IGNORE = "3. Compare CPU and GPU image processing with Spark OCR.ipynb" databricks_runtime_version = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.tokenize('|')[1] - +def spark_version = params.spark_version == null ? 'spark30' : params.spark_version switch(spark_version) { case 'spark24': From ee9a3a8a17087d7c56dfce4adfa036b6c8ed4302 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Fri, 3 Jun 2022 14:41:11 +0300 Subject: [PATCH 111/113] Updated jenkinsfile --- Jenkinsfile | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index e0ae489..9197980 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -17,6 +17,13 @@ def OUTFILEPATH = "." def TESTRESULTPATH = "./reports/junit" def IGNORE = "3. Compare CPU and GPU image processing with Spark OCR.ipynb" +def SPARK_NLP_VERSION = params.nlp_version +def SPARK_NLP_HEALTHCARE_VERSION = params.nlp_healthcare_version +def SPARK_OCR_VERSION = params.ocr_version + +def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION) +def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) + databricks_runtime_version = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.tokenize('|')[1] def spark_version = params.spark_version == null ? 'spark30' : params.spark_version @@ -158,13 +165,6 @@ pipeline { stage('Install deps to Cluster') { steps { script { - def SPARK_NLP_VERSION = params.nlp_version - def SPARK_NLP_HEALTHCARE_VERSION = params.nlp_healthcare_version - def SPARK_OCR_VERSION = params.ocr_version - - def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION) - def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) - //sh("databricks libraries uninstall --cluster-id ${cluster_id} --all") sh("databricks libraries install --cluster-id ${cluster_id} --jar s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-${spark_version}.jar") sh("databricks libraries install --cluster-id ${cluster_id} --jar s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}${nlp_version_prefix}.jar") From 0a9004bfa0fc45ea334be9a39e95bd0fd3122987 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Fri, 3 Jun 2022 15:07:59 +0300 Subject: [PATCH 112/113] Updated jenkinsfile --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 9197980..24900aa 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -229,7 +229,7 @@ pipeline { } post { always { - sh "databricks clusters delete --cluster-id ${cluster_id}" + sh "databricks clusters permanent-delete --cluster-id ${cluster_id}" sh "find ${OUTFILEPATH} -name '*.json' -exec rm {} +" junit allowEmptyResults: true, testResults: "**/reports/junit/*.xml" } From 7fa5bd1d4d86a6d48ac8ab5b19da53a20cfb2075 Mon Sep 17 00:00:00 2001 From: Mykola Melnyk Date: Fri, 3 Jun 2022 15:23:07 +0300 Subject: [PATCH 113/113] Updated jenkinsfile --- Jenkinsfile | 36 ++++++------------------------------ 1 file changed, 6 insertions(+), 30 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 24900aa..2d5df80 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,6 +1,5 @@ @Library('jenkinslib')_ -databricks_runtime_version = "" cluster_id = "" ocr_versions = "" nlp_versions = "" @@ -24,10 +23,10 @@ def SPARK_OCR_VERSION = params.ocr_version def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION) def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION) -databricks_runtime_version = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.tokenize('|')[1] -def spark_version = params.spark_version == null ? 'spark30' : params.spark_version +def DATABRICKS_RUNTIME_VERSION = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.tokenize('|')[1] +def SPARK_VERSION = params.spark_version == null ? 'spark30' : params.spark_version -switch(spark_version) { +switch(SPARK_VERSION) { case 'spark24': nlp_version_prefix="-spark24" break @@ -132,7 +131,7 @@ pipeline { { "num_workers": 1, "cluster_name": "Spark Ocr Notebook Test", - "spark_version": "${databricks_runtime_version}", + "spark_version": "${DATABRICKS_RUNTIME_VERSION}", "spark_conf": { "spark.sql.legacy.allowUntypedScalaUDF": "true" }, @@ -165,11 +164,10 @@ pipeline { stage('Install deps to Cluster') { steps { script { - //sh("databricks libraries uninstall --cluster-id ${cluster_id} --all") - sh("databricks libraries install --cluster-id ${cluster_id} --jar s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-${spark_version}.jar") + sh("databricks libraries install --cluster-id ${cluster_id} --jar s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-${SPARK_VERSION}.jar") sh("databricks libraries install --cluster-id ${cluster_id} --jar s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}${nlp_version_prefix}.jar") sh("databricks libraries install --cluster-id ${cluster_id} --maven-coordinates com.johnsnowlabs.nlp:spark-nlp${nlp_version_prefix}_2.12:${SPARK_NLP_VERSION}") - sh("databricks libraries install --cluster-id ${cluster_id} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/spark-ocr/spark_ocr-${SPARK_OCR_VERSION}+${spark_version}-py3-none-any.whl") + sh("databricks libraries install --cluster-id ${cluster_id} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/spark-ocr/spark_ocr-${SPARK_OCR_VERSION}+${SPARK_VERSION}-py3-none-any.whl") sh("databricks libraries install --cluster-id ${cluster_id} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl/spark_nlp_jsl-${SPARK_NLP_VERSION}-py3-none-any.whl") sh("databricks libraries install --cluster-id ${cluster_id} --pypi-package spark-nlp==${SPARK_NLP_VERSION}") timeout(10) { @@ -184,28 +182,6 @@ pipeline { } } } -// stage('Start cluster') { -// steps { -// script { -// def respString = sh script: "databricks clusters get --cluster-id ${cluster_id}", returnStdout: true -// def respJson = readJSON text: respString -// if (respJson['state'] == 'RUNNING') { -// sh("databricks clusters restart --cluster-id ${cluster_id}") -// } else { -// sh("databricks clusters start --cluster-id ${cluster_id}") -// } -// timeout(10) { -// waitUntil { -// script { -// def respStringWait = sh script: "databricks clusters get --cluster-id ${cluster_id}", returnStdout: true -// def respJsonWait = readJSON text: respStringWait -// return (respJsonWait['state'] == 'RUNNING'); -// } -// } -// } -// } -// } -// } stage('Run Notebook Tests') { steps { script {