From cdb1b09eb415a5e18ccfde939b6266dfbf02f14f Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Mon, 23 May 2022 07:53:42 +0300
Subject: [PATCH 001/113] Added jenkinsfile

---
 .ci/Dockerfile.build |  6 ++++++
 Jenkinsfile          | 18 ++++++++++++++++++
 2 files changed, 24 insertions(+)
 create mode 100644 .ci/Dockerfile.build
 create mode 100644 Jenkinsfile

diff --git a/.ci/Dockerfile.build b/.ci/Dockerfile.build
new file mode 100644
index 0000000..7bcf6c5
--- /dev/null
+++ b/.ci/Dockerfile.build
@@ -0,0 +1,6 @@
+# Build set_ambient
+FROM python:3.7-alpine
+
+ENV LC_ALL=C
+
+RUN pip install databricks-cli requests
\ No newline at end of file
diff --git a/Jenkinsfile b/Jenkinsfile
new file mode 100644
index 0000000..0ea6850
--- /dev/null
+++ b/Jenkinsfile
@@ -0,0 +1,18 @@
+
+pipeline {
+    agent {
+        dockerfile {
+                filename '.ci/Dockerfile.build'
+        }
+    }
+
+    stages {
+        stage('Copy notebooks to Databricks') {
+            steps {
+                script {
+                    databricks  workspace import_dir -o  "./databricks/python" "/Shared/Spark OCR/tests/" --profile mykola
+                }
+            }
+        }
+    }
+}

From f063637d58e9fb066102b5782a10837c408c5432 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Mon, 23 May 2022 18:13:55 +0300
Subject: [PATCH 002/113] Updated jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 0ea6850..430a487 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -10,7 +10,7 @@ pipeline {
         stage('Copy notebooks to Databricks') {
             steps {
                 script {
-                    databricks  workspace import_dir -o  "./databricks/python" "/Shared/Spark OCR/tests/" --profile mykola
+                    sh('databricks  workspace import_dir -o  "./databricks/python" "/Shared/Spark OCR/tests/"')
                 }
             }
         }

From 8f0b3ee18e9dec32219658029e2c8447389b6fcf Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Mon, 23 May 2022 20:56:54 +0300
Subject: [PATCH 003/113] Updated jenkinsfile

---
 Jenkinsfile | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/Jenkinsfile b/Jenkinsfile
index 430a487..aaa5b12 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1,4 +1,6 @@
 
+def DBTOKEN = "DATABRICKS_TOKEN"
+
 pipeline {
     agent {
         dockerfile {
@@ -7,6 +9,22 @@ pipeline {
     }
 
     stages {
+        stage('Setup') {
+            withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
+                sh """#!/bin/bash
+                    # Configure Databricks CLI for deployment
+                    echo "${DBURL}
+                    $TOKEN" | databricks configure --token
+
+                    # Configure Databricks Connect for testing
+                    echo "${DBURL}
+                    $TOKEN
+                    ${CLUSTERID}
+                    0
+                    15001" | databricks-connect configure
+                   """
+            }
+        }
         stage('Copy notebooks to Databricks') {
             steps {
                 script {

From 918de40578b9e6337aa0e4e554d3e72d180f13dc Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Mon, 23 May 2022 20:58:57 +0300
Subject: [PATCH 004/113] Updated jenkinsfile

---
 Jenkinsfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Jenkinsfile b/Jenkinsfile
index aaa5b12..b6c0224 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1,3 +1,4 @@
+@Library('jenkinslib')_
 
 def DBTOKEN = "DATABRICKS_TOKEN"
 

From e4dd7e0f37fdaa7f389c0b8f03cfc91d4d2e2298 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Mon, 23 May 2022 21:05:48 +0300
Subject: [PATCH 005/113] Updated jenkinsfile

---
 Jenkinsfile | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index b6c0224..466aff2 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -11,19 +11,21 @@ pipeline {
 
     stages {
         stage('Setup') {
-            withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
-                sh """#!/bin/bash
-                    # Configure Databricks CLI for deployment
-                    echo "${DBURL}
-                    $TOKEN" | databricks configure --token
+            script {
+                withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
+                    sh """#!/bin/bash
+                        # Configure Databricks CLI for deployment
+                        echo "${DBURL}
+                        $TOKEN" | databricks configure --token
 
-                    # Configure Databricks Connect for testing
-                    echo "${DBURL}
-                    $TOKEN
-                    ${CLUSTERID}
-                    0
-                    15001" | databricks-connect configure
-                   """
+                        # Configure Databricks Connect for testing
+                        echo "${DBURL}
+                        $TOKEN
+                        ${CLUSTERID}
+                        0
+                        15001" | databricks-connect configure
+                       """
+                }
             }
         }
         stage('Copy notebooks to Databricks') {

From 09a7171b3edba7fb65266c7d481f63beff63c723 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Mon, 23 May 2022 21:08:49 +0300
Subject: [PATCH 006/113] Updated jenkinsfile

---
 Jenkinsfile | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 466aff2..cc161bb 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -11,20 +11,22 @@ pipeline {
 
     stages {
         stage('Setup') {
-            script {
-                withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
-                    sh """#!/bin/bash
-                        # Configure Databricks CLI for deployment
-                        echo "${DBURL}
-                        $TOKEN" | databricks configure --token
+            steps {
+                script {
+                    withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
+                        sh """#!/bin/bash
+                            # Configure Databricks CLI for deployment
+                            echo "${DBURL}
+                            $TOKEN" | databricks configure --token
 
-                        # Configure Databricks Connect for testing
-                        echo "${DBURL}
-                        $TOKEN
-                        ${CLUSTERID}
-                        0
-                        15001" | databricks-connect configure
-                       """
+                            # Configure Databricks Connect for testing
+                            echo "${DBURL}
+                            $TOKEN
+                            ${CLUSTERID}
+                            0
+                            15001" | databricks-connect configure
+                           """
+                    }
                 }
             }
         }

From b3d93a197cf33a251a45aef13d18be0350e5ac0b Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 24 May 2022 08:05:41 +0300
Subject: [PATCH 007/113] Updated jenkinsfile

---
 Jenkinsfile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Jenkinsfile b/Jenkinsfile
index cc161bb..3e7adae 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1,6 +1,8 @@
 @Library('jenkinslib')_
 
 def DBTOKEN = "DATABRICKS_TOKEN"
+def DBURL = "https://dbc-6ca13d9d-74bb.cloud.databricks.com/"
+def CLUSTERID = "0428-112519-vaxgi8gx"
 
 pipeline {
     agent {

From 917788cff2f66bdeb05301edcbd3bdc787caecf5 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 24 May 2022 08:13:22 +0300
Subject: [PATCH 008/113] Updated jenkinsfile

---
 Jenkinsfile | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 3e7adae..66ddbb2 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -10,6 +10,9 @@ pipeline {
                 filename '.ci/Dockerfile.build'
         }
     }
+    environment {
+            DBTOKEN_CREDS = credentials(DBTOKEN)
+        }
 
     stages {
         stage('Setup') {
@@ -19,11 +22,11 @@ pipeline {
                         sh """#!/bin/bash
                             # Configure Databricks CLI for deployment
                             echo "${DBURL}
-                            $TOKEN" | databricks configure --token
+                            $DBTOKEN_CREDS" | databricks configure --token
 
                             # Configure Databricks Connect for testing
                             echo "${DBURL}
-                            $TOKEN
+                            $DBTOKEN_CREDS
                             ${CLUSTERID}
                             0
                             15001" | databricks-connect configure

From 9998301bba2b4546a2cbe9dc492ff16783858dbb Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 24 May 2022 08:15:15 +0300
Subject: [PATCH 009/113] Updated jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 66ddbb2..3341d27 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -11,7 +11,7 @@ pipeline {
         }
     }
     environment {
-            DBTOKEN_CREDS = credentials(DBTOKEN)
+            DBTOKEN_CREDS = credentials("DATABRICKS_TOKEN")
         }
 
     stages {

From 3535cdb2c97532b627ba2888ee84cc83e0062d83 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 24 May 2022 08:23:31 +0300
Subject: [PATCH 010/113] Updated jenkinsfile

---
 Jenkinsfile | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 3341d27..e9fac14 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -10,16 +10,12 @@ pipeline {
                 filename '.ci/Dockerfile.build'
         }
     }
-    environment {
-            DBTOKEN_CREDS = credentials("DATABRICKS_TOKEN")
-        }
-
     stages {
         stage('Setup') {
             steps {
                 script {
                     withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
-                        sh """#!/bin/bash
+                        sh '''#!/bin/bash
                             # Configure Databricks CLI for deployment
                             echo "${DBURL}
                             $DBTOKEN_CREDS" | databricks configure --token
@@ -30,7 +26,7 @@ pipeline {
                             ${CLUSTERID}
                             0
                             15001" | databricks-connect configure
-                           """
+                           '''
                     }
                 }
             }

From 9953f9a603e99a6b0b7b6957e655fa886074acf0 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 24 May 2022 08:27:31 +0300
Subject: [PATCH 011/113] Updated jenkinsfile

---
 Jenkinsfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index e9fac14..cd1f448 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -15,7 +15,7 @@ pipeline {
             steps {
                 script {
                     withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
-                        sh '''#!/bin/bash
+                        sh(script: '''#!/bin/bash
                             # Configure Databricks CLI for deployment
                             echo "${DBURL}
                             $DBTOKEN_CREDS" | databricks configure --token
@@ -26,7 +26,7 @@ pipeline {
                             ${CLUSTERID}
                             0
                             15001" | databricks-connect configure
-                           '''
+                           ''')
                     }
                 }
             }

From 9e15fba540268125f4a136fe3f60f140ff49af58 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 24 May 2022 08:29:31 +0300
Subject: [PATCH 012/113] Updated jenkinsfile

---
 Jenkinsfile | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index cd1f448..a45dc97 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -19,13 +19,6 @@ pipeline {
                             # Configure Databricks CLI for deployment
                             echo "${DBURL}
                             $DBTOKEN_CREDS" | databricks configure --token
-
-                            # Configure Databricks Connect for testing
-                            echo "${DBURL}
-                            $DBTOKEN_CREDS
-                            ${CLUSTERID}
-                            0
-                            15001" | databricks-connect configure
                            ''')
                     }
                 }

From fd5cb03e9dbcad4e0613999606c093da3cabb39c Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 24 May 2022 08:57:05 +0300
Subject: [PATCH 013/113] Updated jenkinsfile

---
 Jenkinsfile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index a45dc97..bae4c94 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -17,8 +17,7 @@ pipeline {
                     withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
                         sh(script: '''#!/bin/bash
                             # Configure Databricks CLI for deployment
-                            echo "${DBURL}
-                            $DBTOKEN_CREDS" | databricks configure --token
+                            echo "${DBURL} $DBTOKEN_CREDS"
                            ''')
                     }
                 }

From 3c970d24c596aeccb58c941adedebad427f2b743 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 24 May 2022 19:01:49 +0300
Subject: [PATCH 014/113] Updated jenkinsfile

---
 Jenkinsfile | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index bae4c94..69564fc 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -15,9 +15,7 @@ pipeline {
             steps {
                 script {
                     withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
-                        sh(script: '''#!/bin/bash
-                            # Configure Databricks CLI for deployment
-                            echo "${DBURL} $DBTOKEN_CREDS"
+                        sh(script: '''echo "test"
                            ''')
                     }
                 }

From 2e101c6ff765365de97d12c8cf78b6f2291fcae5 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 24 May 2022 19:04:44 +0300
Subject: [PATCH 015/113] Updated jenkinsfile

---
 Jenkinsfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 69564fc..85ac534 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -15,7 +15,8 @@ pipeline {
             steps {
                 script {
                     withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
-                        sh(script: '''echo "test"
+                        sh(script: '''echo "${DBURL}
+$DBTOKEN_CREDS" | databricks configure --token
                            ''')
                     }
                 }

From 8793722648766576663472976d647a9c74420c5a Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 24 May 2022 19:09:54 +0300
Subject: [PATCH 016/113] Updated jenkinsfile

---
 Jenkinsfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 85ac534..df5bd95 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -15,7 +15,8 @@ pipeline {
             steps {
                 script {
                     withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
-                        sh(script: '''echo "${DBURL}
+                        sh(script: '''#!/bin/bash
+echo "${DBURL}
 $DBTOKEN_CREDS" | databricks configure --token
                            ''')
                     }

From 8a0932daea74c3fb295953db9fda31765ccb6d7c Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 24 May 2022 19:13:03 +0300
Subject: [PATCH 017/113] Updated jenkinsfile

---
 Jenkinsfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index df5bd95..177fa3f 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -15,9 +15,9 @@ pipeline {
             steps {
                 script {
                     withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
-                        sh(script: '''#!/bin/bash
+                        sh(script: '''
 echo "${DBURL}
-$DBTOKEN_CREDS" | databricks configure --token
+$DBTOKEN_CREDS" > databricks configure --token
                            ''')
                     }
                 }

From 792c604533d26ffd1354b28c41b0175079972f42 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 24 May 2022 19:36:08 +0300
Subject: [PATCH 018/113] Updated jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 177fa3f..bdd8be0 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -17,7 +17,7 @@ pipeline {
                     withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
                         sh(script: '''
 echo "${DBURL}
-$DBTOKEN_CREDS" > databricks configure --token
+$TOKEN" | databricks configure --token
                            ''')
                     }
                 }

From 971ea63c2fea7a660c278ff7f6ed9b5b22abbe90 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 24 May 2022 19:46:54 +0300
Subject: [PATCH 019/113] Updated jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index bdd8be0..fd3d516 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -16,7 +16,7 @@ pipeline {
                 script {
                     withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
                         sh(script: '''
-echo "${DBURL}
+echo "$DBURL
 $TOKEN" | databricks configure --token
                            ''')
                     }

From 7675238e44a0a2f62e72dc45aa76c9cfb4ca8468 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 24 May 2022 19:48:51 +0300
Subject: [PATCH 020/113] Updated jenkinsfile

---
 Jenkinsfile | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index fd3d516..42880fd 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -15,10 +15,9 @@ pipeline {
             steps {
                 script {
                     withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
-                        sh(script: '''
+                        sh(script: '''#!/bin/bash
 echo "$DBURL
-$TOKEN" | databricks configure --token
-                           ''')
+$TOKEN" | databricks configure --token''')
                     }
                 }
             }

From 8e407bb43ddd31f9a22b082e6404d03801a4f6d2 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 24 May 2022 19:55:22 +0300
Subject: [PATCH 021/113] Updated jenkinsfile

---
 Jenkinsfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 42880fd..b6f4f2d 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -15,9 +15,10 @@ pipeline {
             steps {
                 script {
                     withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
-                        sh(script: '''#!/bin/bash
+                        def output = sh(returnStdout: true, script: '''#!/bin/bash
 echo "$DBURL
 $TOKEN" | databricks configure --token''')
+                        echo $output
                     }
                 }
             }

From 6c70af86b71bef2a27f159823111d6ce6a7f6ba6 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 25 May 2022 19:38:21 +0300
Subject: [PATCH 022/113] Updated jenkinsfile

---
 Jenkinsfile | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index b6f4f2d..a8158f4 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -15,10 +15,9 @@ pipeline {
             steps {
                 script {
                     withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
-                        def output = sh(returnStdout: true, script: '''#!/bin/bash
-echo "$DBURL
-$TOKEN" | databricks configure --token''')
-                        echo $output
+                        sh(script: '''#!/bin/bash
+                            echo "$TOKEN" > secret.txt
+                            databricks configure --token-file secret.txt --host $DBURL''')
                     }
                 }
             }

From e9bfb609cfca9b1827a320c28cb3d40760145abf Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 25 May 2022 19:41:20 +0300
Subject: [PATCH 023/113] Updated jenkinsfile

---
 Jenkinsfile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index a8158f4..e05bf78 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -15,9 +15,9 @@ pipeline {
             steps {
                 script {
                     withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
-                        sh(script: '''#!/bin/bash
-                            echo "$TOKEN" > secret.txt
-                            databricks configure --token-file secret.txt --host $DBURL''')
+                        sh('''#!/bin/bash
+                            echo "${TOKEN}" > secret.txt
+                            databricks configure --token-file secret.txt --host ${DBURL}''')
                     }
                 }
             }

From 6e322a65b1f11d94327f54cc4e4fd297a3414b2e Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 25 May 2022 19:42:30 +0300
Subject: [PATCH 024/113] Updated jenkinsfile

---
 Jenkinsfile | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index e05bf78..3c2527b 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -15,9 +15,8 @@ pipeline {
             steps {
                 script {
                     withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
-                        sh('''#!/bin/bash
-                            echo "${TOKEN}" > secret.txt
-                            databricks configure --token-file secret.txt --host ${DBURL}''')
+                        sh('echo "${TOKEN}" > secret.txt')
+                        sh('databricks configure --token-file secret.txt --host ${DBURL}')
                     }
                 }
             }

From 0ab14dcba4f56343bf80994c5110812164451188 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 25 May 2022 19:44:40 +0300
Subject: [PATCH 025/113] Updated jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 3c2527b..5ebb55a 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -16,7 +16,7 @@ pipeline {
                 script {
                     withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
                         sh('echo "${TOKEN}" > secret.txt')
-                        sh('databricks configure --token-file secret.txt --host ${DBURL}')
+                        sh("databricks configure --token-file secret.txt --host ${DBURL}")
                     }
                 }
             }

From 5a91bb21f99fc68ec1c90bce25087a8b91d2f8dc Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 25 May 2022 19:54:59 +0300
Subject: [PATCH 026/113] Updated jenkinsfile

---
 Jenkinsfile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Jenkinsfile b/Jenkinsfile
index 5ebb55a..e0258bc 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -10,6 +10,9 @@ pipeline {
                 filename '.ci/Dockerfile.build'
         }
     }
+    environment {
+        DATABRICKS_CONFIG_FILE = ".databricks.cfg"
+    }
     stages {
         stage('Setup') {
             steps {

From 09fd219af3e9740a698d42d74050d1879aa55b4c Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 26 May 2022 17:33:34 +0300
Subject: [PATCH 027/113] Updated Jenkinsfile

---
 Jenkinsfile | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index e0258bc..4b2349c 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1,8 +1,13 @@
 @Library('jenkinslib')_
 
 def DBTOKEN = "DATABRICKS_TOKEN"
-def DBURL = "https://dbc-6ca13d9d-74bb.cloud.databricks.com/"
+def DBURL = "https://dbc-6ca13d9d-74bb.cloud.databricks.com"
 def CLUSTERID = "0428-112519-vaxgi8gx"
+def SCRIPTPATH = "./.ci"
+def NOTEBOOKPATH = "./databricks/python"
+def WORKSPACEPATH = "/Shared/Spark OCR/tests"
+def OUTFILEPATH = "."
+def TESTRESULTPATH = "."
 
 pipeline {
     agent {
@@ -27,9 +32,29 @@ pipeline {
         stage('Copy notebooks to Databricks') {
             steps {
                 script {
-                    sh('databricks  workspace import_dir -o  "./databricks/python" "/Shared/Spark OCR/tests/"')
+                    sh('databricks  workspace import_dir -o  "${NOTEBOOKPATH}" "${WORKSPACEPATH}"')
                 }
             }
         }
+        stage('Run Notebook Tests') {
+            withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
+                sh '''python3 ${SCRIPTPATH}/executenotebook.py --workspace=${DBURL}\
+                                --token=$TOKEN\
+                                --clusterid=${CLUSTERID}\
+                                --localpath=${NOTEBOOKPATH}\
+                                --workspacepath=${WORKSPACEPATH}\
+                                --outfilepath=${OUTFILEPATH}
+                   '''
+                sh '''sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py
+                          python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py || true
+                   '''
+              }
+        stage('Report Test Results') {
+                sh """find ${OUTFILEPATH} -name '*.json' -exec gzip --verbose {} \\;
+                      touch ${TESTRESULTPATH}/TEST-*.xml
+                   """
+                junit "**/reports/junit/*.xml"
+            }
+        }
     }
 }

From c1e440aacebab030176fb4ae67abf80c65f6fc75 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 26 May 2022 17:34:52 +0300
Subject: [PATCH 028/113] Updated Jenkinsfile

---
 .ci/evaluatenotebookruns.py |  43 +++++++++++++++
 .ci/executenotebook.py      | 106 ++++++++++++++++++++++++++++++++++++
 2 files changed, 149 insertions(+)
 create mode 100644 .ci/evaluatenotebookruns.py
 create mode 100644 .ci/executenotebook.py

diff --git a/.ci/evaluatenotebookruns.py b/.ci/evaluatenotebookruns.py
new file mode 100644
index 0000000..3169bb2
--- /dev/null
+++ b/.ci/evaluatenotebookruns.py
@@ -0,0 +1,43 @@
+# evaluatenotebookruns.py
+import unittest
+import json
+import glob
+import os
+
+class TestJobOutput(unittest.TestCase):
+
+    test_output_path = './tests/res'
+
+    # def test_performance(self):
+    #     path = self.test_output_path
+    #     statuses = []
+    #
+    #     for filename in glob.glob(os.path.join(path, '*.json')):
+    #         print('Evaluating: ' + filename)
+    #         data = json.load(open(filename))
+    #         duration = data['execution_duration']
+    #         if duration > 100000:
+    #             status = 'FAILED'
+    #         else:
+    #             status = 'SUCCESS'
+    #
+    #         statuses.append(status)
+    #
+    #     self.assertFalse('FAILED' in statuses)
+
+
+    def test_job_run(self):
+        path = self.test_output_path
+        statuses = []
+
+
+        for filename in glob.glob(os.path.join(path, '*.json')):
+            print('Evaluating: ' + filename)
+            data = json.load(open(filename))
+            status = data['state']['result_state']
+            statuses.append(status)
+
+        self.assertFalse('FAILED' in statuses)
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file
diff --git a/.ci/executenotebook.py b/.ci/executenotebook.py
new file mode 100644
index 0000000..619a139
--- /dev/null
+++ b/.ci/executenotebook.py
@@ -0,0 +1,106 @@
+# executenotebook.py
+#!/usr/bin/python3
+import json
+import requests
+import os
+import sys
+import getopt
+import time
+
+
+def main():
+    workspace = ''
+    token = ''
+    clusterid = ''
+    localpath = ''
+    workspacepath = ''
+    outfilepath = ''
+
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], 'hs:t:c:lwo',
+                                   ['workspace=', 'token=', 'clusterid=', 'localpath=', 'workspacepath=', 'outfilepath='])
+    except getopt.GetoptError:
+        print(
+            'executenotebook.py -s <workspace> -t <token>  -c <clusterid> -l <localpath> -w <workspacepath> -o <outfilepath>)')
+        sys.exit(2)
+
+    for opt, arg in opts:
+        if opt == '-h':
+            print(
+                'executenotebook.py -s <workspace> -t <token> -c <clusterid> -l <localpath> -w <workspacepath> -o <outfilepath>')
+            sys.exit()
+        elif opt in ('-s', '--workspace'):
+            workspace = arg
+        elif opt in ('-t', '--token'):
+            token = arg
+        elif opt in ('-c', '--clusterid'):
+            clusterid = arg
+        elif opt in ('-l', '--localpath'):
+            localpath = arg
+        elif opt in ('-w', '--workspacepath'):
+            workspacepath = arg
+        elif opt in ('-o', '--outfilepath'):
+            outfilepath = arg
+
+    print('-s is ' + workspace)
+    print('-t is ' + token)
+    print('-c is ' + clusterid)
+    print('-l is ' + localpath)
+    print('-w is ' + workspacepath)
+    print('-o is ' + outfilepath)
+    # Generate array from walking local path
+
+    notebooks = []
+    for path, subdirs, files in os.walk(localpath):
+        for name in files:
+            fullpath = path + '/' + name
+            # removes localpath to repo but keeps workspace path
+            fullworkspacepath = workspacepath + path.replace(localpath, '')
+
+            name, file_extension = os.path.splitext(fullpath)
+            if file_extension.lower() in ['.ipynb']:
+                row = [fullpath, fullworkspacepath, 1]
+                notebooks.append(row)
+
+    # run each element in list
+    for notebook in notebooks:
+        nameonly = os.path.basename(notebook[0])
+        workspacepath = notebook[1]
+
+        name, file_extension = os.path.splitext(nameonly)
+
+        # workpath removes extension
+        fullworkspacepath = workspacepath + '/' + name
+
+        print('Running job for:' + fullworkspacepath)
+        values = {'run_name': name, 'existing_cluster_id': clusterid, 'timeout_seconds': 3600, 'notebook_task': {'notebook_path': fullworkspacepath}}
+
+        resp = requests.post(workspace + '/api/2.0/jobs/runs/submit',
+                             data=json.dumps(values), auth=("token", token))
+        runjson = resp.text
+        print("runjson:" + runjson)
+        d = json.loads(runjson)
+        runid = d['run_id']
+
+        i=0
+        waiting = True
+        while waiting:
+            time.sleep(10)
+            jobresp = requests.get(workspace + '/api/2.0/jobs/runs/get?run_id='+str(runid),
+                                   data=json.dumps(values), auth=("token", token))
+            jobjson = jobresp.text
+            print("jobjson:" + jobjson)
+            j = json.loads(jobjson)
+            current_state = j['state']['life_cycle_state']
+            runid = j['run_id']
+            if current_state in ['TERMINATED', 'INTERNAL_ERROR', 'SKIPPED'] or i >= 12:
+                break
+            i=i+1
+
+        if outfilepath != '':
+            file = open(outfilepath + '/' +  str(runid) + '.json', 'w')
+            file.write(json.dumps(j))
+            file.close()
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

From b3bc04ced98090a59c50aef1604e2c930831c8aa Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 26 May 2022 17:36:15 +0300
Subject: [PATCH 029/113] Updated Jenkinsfile

---
 Jenkinsfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 4b2349c..dff66d2 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -48,7 +48,8 @@ pipeline {
                 sh '''sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py
                           python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py || true
                    '''
-              }
+             }
+        }
         stage('Report Test Results') {
                 sh """find ${OUTFILEPATH} -name '*.json' -exec gzip --verbose {} \\;
                       touch ${TESTRESULTPATH}/TEST-*.xml

From 4cf132101bdce60cc011325bff2369d79ea161e4 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 26 May 2022 17:38:29 +0300
Subject: [PATCH 030/113] Updated Jenkinsfile

---
 Jenkinsfile | 39 +++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index dff66d2..3cb20f6 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -37,24 +37,31 @@ pipeline {
             }
         }
         stage('Run Notebook Tests') {
-            withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
-                sh '''python3 ${SCRIPTPATH}/executenotebook.py --workspace=${DBURL}\
-                                --token=$TOKEN\
-                                --clusterid=${CLUSTERID}\
-                                --localpath=${NOTEBOOKPATH}\
-                                --workspacepath=${WORKSPACEPATH}\
-                                --outfilepath=${OUTFILEPATH}
-                   '''
-                sh '''sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py
-                          python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py || true
-                   '''
-             }
+            steps {
+                script {
+                    withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
+                        sh '''python3 ${SCRIPTPATH}/executenotebook.py --workspace=${DBURL}\
+                                        --token=$TOKEN\
+                                        --clusterid=${CLUSTERID}\
+                                        --localpath=${NOTEBOOKPATH}\
+                                        --workspacepath=${WORKSPACEPATH}\
+                                        --outfilepath=${OUTFILEPATH}
+                           '''
+                        sh '''sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py
+                                  python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py || true
+                           '''
+                    }
+                }
+            }
         }
         stage('Report Test Results') {
-                sh """find ${OUTFILEPATH} -name '*.json' -exec gzip --verbose {} \\;
-                      touch ${TESTRESULTPATH}/TEST-*.xml
-                   """
-                junit "**/reports/junit/*.xml"
+            steps {
+                script {
+                    sh """find ${OUTFILEPATH} -name '*.json' -exec gzip --verbose {} \\;
+                          touch ${TESTRESULTPATH}/TEST-*.xml
+                       """
+                    junit "**/reports/junit/*.xml"
+                }
             }
         }
     }

From 494c5c4b464498d857de79566ff253e73a17bb61 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 26 May 2022 17:45:15 +0300
Subject: [PATCH 031/113] Updated Jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 3cb20f6..4242c16 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -32,7 +32,7 @@ pipeline {
         stage('Copy notebooks to Databricks') {
             steps {
                 script {
-                    sh('databricks  workspace import_dir -o  "${NOTEBOOKPATH}" "${WORKSPACEPATH}"')
+                    sh("""databricks  workspace import_dir -o  "${NOTEBOOKPATH}" "${WORKSPACEPATH}"""")
                 }
             }
         }

From 724a17a538dee11b90f9ec32493c7a8968ed663f Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 26 May 2022 17:46:46 +0300
Subject: [PATCH 032/113] Updated Jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 4242c16..fc5bde0 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -32,7 +32,7 @@ pipeline {
         stage('Copy notebooks to Databricks') {
             steps {
                 script {
-                    sh("""databricks  workspace import_dir -o  "${NOTEBOOKPATH}" "${WORKSPACEPATH}"""")
+                    sh('''databricks  workspace import_dir -o  '${NOTEBOOKPATH}' '${WORKSPACEPATH}' ''')
                 }
             }
         }

From 57ac9ff938fe556ccfd4ca14c0a716d240d5ffdf Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 26 May 2022 17:47:49 +0300
Subject: [PATCH 033/113] Updated Jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index fc5bde0..bea5fc5 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -32,7 +32,7 @@ pipeline {
         stage('Copy notebooks to Databricks') {
             steps {
                 script {
-                    sh('''databricks  workspace import_dir -o  '${NOTEBOOKPATH}' '${WORKSPACEPATH}' ''')
+                    sh("databricks  workspace import_dir -o '${NOTEBOOKPATH}' '${WORKSPACEPATH}'")
                 }
             }
         }

From 219c7cc54c09c158f8063dec00310751cb870cb4 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 26 May 2022 18:39:12 +0300
Subject: [PATCH 034/113] Updated Jenkinsfile

---
 Jenkinsfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index bea5fc5..4ac61ad 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -40,9 +40,9 @@ pipeline {
             steps {
                 script {
                     withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
-                        sh '''python3 ${SCRIPTPATH}/executenotebook.py --workspace=${DBURL}\
+                        sh '''python3 $SCRIPTPATH/executenotebook.py --workspace=$DBURL\
                                         --token=$TOKEN\
-                                        --clusterid=${CLUSTERID}\
+                                        --clusterid=$CLUSTERID\
                                         --localpath=${NOTEBOOKPATH}\
                                         --workspacepath=${WORKSPACEPATH}\
                                         --outfilepath=${OUTFILEPATH}

From 3c6203e24ee72a0b788ca09daea6a8c54eb7e8d1 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 26 May 2022 18:40:52 +0300
Subject: [PATCH 035/113] Updated Jenkinsfile

---
 Jenkinsfile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 4ac61ad..cd98a7b 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -40,16 +40,16 @@ pipeline {
             steps {
                 script {
                     withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
-                        sh '''python3 $SCRIPTPATH/executenotebook.py --workspace=$DBURL\
+                        sh """python3 $SCRIPTPATH/executenotebook.py --workspace=$DBURL\
                                         --token=$TOKEN\
                                         --clusterid=$CLUSTERID\
                                         --localpath=${NOTEBOOKPATH}\
                                         --workspacepath=${WORKSPACEPATH}\
                                         --outfilepath=${OUTFILEPATH}
-                           '''
-                        sh '''sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py
+                           """
+                        sh """sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py
                                   python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py || true
-                           '''
+                           """
                     }
                 }
             }

From 541e194c5b4d69dd662fabf9105599ac0708901d Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 26 May 2022 18:46:20 +0300
Subject: [PATCH 036/113] Updated Jenkinsfile

---
 Jenkinsfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index cd98a7b..1f1ad25 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -44,8 +44,8 @@ pipeline {
                                         --token=$TOKEN\
                                         --clusterid=$CLUSTERID\
                                         --localpath=${NOTEBOOKPATH}\
-                                        --workspacepath=${WORKSPACEPATH}\
-                                        --outfilepath=${OUTFILEPATH}
+                                        --workspacepath='${WORKSPACEPATH}'\
+                                        --outfilepath='${OUTFILEPATH}'
                            """
                         sh """sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py
                                   python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py || true

From 4670a6f15739e620d597dd8325de2d5b450ba697 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 26 May 2022 19:47:40 +0300
Subject: [PATCH 037/113] Updated Jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 1f1ad25..1f4b64b 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -60,7 +60,7 @@ pipeline {
                     sh """find ${OUTFILEPATH} -name '*.json' -exec gzip --verbose {} \\;
                           touch ${TESTRESULTPATH}/TEST-*.xml
                        """
-                    junit "**/reports/junit/*.xml"
+                    junit "${TESTRESULTPATH}/TEST-*.xml"
                 }
             }
         }

From f33194dd788fe8fdb83960247c05a5f7541801dd Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 26 May 2022 20:26:04 +0300
Subject: [PATCH 038/113] Updated Jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 1f4b64b..452219e 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -60,7 +60,7 @@ pipeline {
                     sh """find ${OUTFILEPATH} -name '*.json' -exec gzip --verbose {} \\;
                           touch ${TESTRESULTPATH}/TEST-*.xml
                        """
-                    junit "${TESTRESULTPATH}/TEST-*.xml"
+                    junit allowEmptyResults: true, testResults:"${TESTRESULTPATH}/TEST-notebookout.xml"
                 }
             }
         }

From abdd369a4cda7c7e1f55ab71b612cbd8001f00a7 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 26 May 2022 20:42:03 +0300
Subject: [PATCH 039/113] Updated Jenkinsfile

---
 .ci/Dockerfile.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.ci/Dockerfile.build b/.ci/Dockerfile.build
index 7bcf6c5..5d15c6c 100644
--- a/.ci/Dockerfile.build
+++ b/.ci/Dockerfile.build
@@ -3,4 +3,4 @@ FROM python:3.7-alpine
 
 ENV LC_ALL=C
 
-RUN pip install databricks-cli requests
\ No newline at end of file
+RUN pip install databricks-cli requests pytest
\ No newline at end of file

From c375813fd8e76e8b75246824ca7df5133c5fe073 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 26 May 2022 21:20:36 +0300
Subject: [PATCH 040/113] Updated Jenkinsfile

---
 Jenkinsfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 452219e..c401dc0 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -7,7 +7,7 @@ def SCRIPTPATH = "./.ci"
 def NOTEBOOKPATH = "./databricks/python"
 def WORKSPACEPATH = "/Shared/Spark OCR/tests"
 def OUTFILEPATH = "."
-def TESTRESULTPATH = "."
+def TESTRESULTPATH = "./reports/junit"
 
 pipeline {
     agent {
@@ -60,7 +60,7 @@ pipeline {
                     sh """find ${OUTFILEPATH} -name '*.json' -exec gzip --verbose {} \\;
                           touch ${TESTRESULTPATH}/TEST-*.xml
                        """
-                    junit allowEmptyResults: true, testResults:"${TESTRESULTPATH}/TEST-notebookout.xml"
+                    junit "**/reports/junit/*.xml"
                 }
             }
         }

From 8a8647091696f0240fbd57e6a00882dc9d0de74c Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Fri, 27 May 2022 07:29:06 +0300
Subject: [PATCH 041/113] Updated Jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index c401dc0..2ee06b3 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -48,7 +48,7 @@ pipeline {
                                         --outfilepath='${OUTFILEPATH}'
                            """
                         sh """sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py
-                                  python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py || true
+                                  python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py
                            """
                     }
                 }

From 6ae741c75158ed67fc2be1567017be9c2ebcdf7a Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Fri, 27 May 2022 08:47:55 +0300
Subject: [PATCH 042/113] Updated Jenkinsfile

---
 .ci/evaluatenotebookruns.py | 4 +++-
 .ci/executenotebook.py      | 6 +++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/.ci/evaluatenotebookruns.py b/.ci/evaluatenotebookruns.py
index 3169bb2..fa28423 100644
--- a/.ci/evaluatenotebookruns.py
+++ b/.ci/evaluatenotebookruns.py
@@ -3,6 +3,7 @@
 import json
 import glob
 import os
+import logging
 
 class TestJobOutput(unittest.TestCase):
 
@@ -32,12 +33,13 @@ def test_job_run(self):
 
 
         for filename in glob.glob(os.path.join(path, '*.json')):
-            print('Evaluating: ' + filename)
+            logging.info('Evaluating: ' + filename)
             data = json.load(open(filename))
             status = data['state']['result_state']
             statuses.append(status)
 
         self.assertFalse('FAILED' in statuses)
+        self.assertFalse('RUNNING' in statuses)
 
 if __name__ == '__main__':
     unittest.main()
\ No newline at end of file
diff --git a/.ci/executenotebook.py b/.ci/executenotebook.py
index 619a139..1585fc8 100644
--- a/.ci/executenotebook.py
+++ b/.ci/executenotebook.py
@@ -82,7 +82,7 @@ def main():
         d = json.loads(runjson)
         runid = d['run_id']
 
-        i=0
+        i = 0
         waiting = True
         while waiting:
             time.sleep(10)
@@ -93,9 +93,9 @@ def main():
             j = json.loads(jobjson)
             current_state = j['state']['life_cycle_state']
             runid = j['run_id']
-            if current_state in ['TERMINATED', 'INTERNAL_ERROR', 'SKIPPED'] or i >= 12:
+            if current_state in ['TERMINATED', 'INTERNAL_ERROR', 'SKIPPED'] or i >= 24:
                 break
-            i=i+1
+            i = i + 1
 
         if outfilepath != '':
             file = open(outfilepath + '/' +  str(runid) + '.json', 'w')

From 13a108fab616502539483b95effb2e6f9e82cfdf Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Fri, 27 May 2022 09:18:28 +0300
Subject: [PATCH 043/113] Updated Jenkinsfile

---
 .ci/evaluatenotebookruns.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.ci/evaluatenotebookruns.py b/.ci/evaluatenotebookruns.py
index fa28423..f1912f9 100644
--- a/.ci/evaluatenotebookruns.py
+++ b/.ci/evaluatenotebookruns.py
@@ -7,7 +7,7 @@
 
 class TestJobOutput(unittest.TestCase):
 
-    test_output_path = './tests/res'
+    test_output_path = '#ENV#'
 
     # def test_performance(self):
     #     path = self.test_output_path
@@ -39,7 +39,6 @@ def test_job_run(self):
             statuses.append(status)
 
         self.assertFalse('FAILED' in statuses)
-        self.assertFalse('RUNNING' in statuses)
 
 if __name__ == '__main__':
     unittest.main()
\ No newline at end of file

From b81c85384a23cffc4017d576989526f83463964a Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Fri, 27 May 2022 09:36:30 +0300
Subject: [PATCH 044/113] Updated Jenkinsfile

---
 .ci/evaluatenotebookruns.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/.ci/evaluatenotebookruns.py b/.ci/evaluatenotebookruns.py
index f1912f9..ddc0d99 100644
--- a/.ci/evaluatenotebookruns.py
+++ b/.ci/evaluatenotebookruns.py
@@ -35,10 +35,14 @@ def test_job_run(self):
         for filename in glob.glob(os.path.join(path, '*.json')):
             logging.info('Evaluating: ' + filename)
             data = json.load(open(filename))
-            status = data['state']['result_state']
-            statuses.append(status)
+            if data['state']['life_cycle_state'] == "RUNNING":
+                statuses.append('NOT_COMPLETED')
+            else:
+                status = data['state']['result_state']
+                statuses.append(status)
 
         self.assertFalse('FAILED' in statuses)
+        self.assertFalse('NOT_COMPLETED' in statuses)
 
 if __name__ == '__main__':
     unittest.main()
\ No newline at end of file

From 482e8c67ff0c0bd003a54ed796cf34daac64556a Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Fri, 27 May 2022 09:51:15 +0300
Subject: [PATCH 045/113] Updated Jenkinsfile

---
 Jenkinsfile | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 2ee06b3..305cd25 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -48,21 +48,16 @@ pipeline {
                                         --outfilepath='${OUTFILEPATH}'
                            """
                         sh """sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py
-                                  python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py
+                              python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py
                            """
                     }
                 }
             }
         }
-        stage('Report Test Results') {
-            steps {
-                script {
-                    sh """find ${OUTFILEPATH} -name '*.json' -exec gzip --verbose {} \\;
-                          touch ${TESTRESULTPATH}/TEST-*.xml
-                       """
-                    junit "**/reports/junit/*.xml"
-                }
-            }
-        }
     }
+    post {
+        always {
+            sh "touch ${TESTRESULTPATH}/TEST-*.xml"
+            junit allowEmptyResults: true, testResults: "**/reports/junit/*.xml"
+        }
 }

From bffb2fc8c665ab774a203e3d651b9bb009de02e7 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Fri, 27 May 2022 09:54:42 +0300
Subject: [PATCH 046/113] Updated Jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 305cd25..0321de3 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -57,7 +57,7 @@ pipeline {
     }
     post {
         always {
-            sh "touch ${TESTRESULTPATH}/TEST-*.xml"
             junit allowEmptyResults: true, testResults: "**/reports/junit/*.xml"
         }
+    }
 }

From b94cfeb4849e3bb7c52564887397f76827998232 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Fri, 27 May 2022 14:37:07 +0300
Subject: [PATCH 047/113] Updated Jenkinsfile

---
 .ci/executenotebook.py | 90 +++++++++++++++++++++++-------------------
 Jenkinsfile            |  4 +-
 2 files changed, 53 insertions(+), 41 deletions(-)

diff --git a/.ci/executenotebook.py b/.ci/executenotebook.py
index 1585fc8..9fdf2ee 100644
--- a/.ci/executenotebook.py
+++ b/.ci/executenotebook.py
@@ -6,6 +6,7 @@
 import sys
 import getopt
 import time
+import logging
 
 
 def main():
@@ -15,10 +16,11 @@ def main():
     localpath = ''
     workspacepath = ''
     outfilepath = ''
+    ignore = ''
 
     try:
         opts, args = getopt.getopt(sys.argv[1:], 'hs:t:c:lwo',
-                                   ['workspace=', 'token=', 'clusterid=', 'localpath=', 'workspacepath=', 'outfilepath='])
+                                   ['workspace=', 'token=', 'clusterid=', 'localpath=', 'workspacepath=', 'outfilepath=', 'ignore='])
     except getopt.GetoptError:
         print(
             'executenotebook.py -s <workspace> -t <token>  -c <clusterid> -l <localpath> -w <workspacepath> -o <outfilepath>)')
@@ -41,6 +43,8 @@ def main():
             workspacepath = arg
         elif opt in ('-o', '--outfilepath'):
             outfilepath = arg
+        elif opt in ('-i', '--ignore'):
+            ignore = arg
 
     print('-s is ' + workspace)
     print('-t is ' + token)
@@ -48,11 +52,17 @@ def main():
     print('-l is ' + localpath)
     print('-w is ' + workspacepath)
     print('-o is ' + outfilepath)
+    print('-i is ' + ignore)
     # Generate array from walking local path
 
+    ignore = ignore.split(',')
+
     notebooks = []
     for path, subdirs, files in os.walk(localpath):
         for name in files:
+            if name in ignore:
+                logging.warning(f'Ignore ${name}')
+                continue
             fullpath = path + '/' + name
             # removes localpath to repo but keeps workspace path
             fullworkspacepath = workspacepath + path.replace(localpath, '')
@@ -62,45 +72,45 @@ def main():
                 row = [fullpath, fullworkspacepath, 1]
                 notebooks.append(row)
 
-    # run each element in list
-    for notebook in notebooks:
-        nameonly = os.path.basename(notebook[0])
-        workspacepath = notebook[1]
-
-        name, file_extension = os.path.splitext(nameonly)
-
-        # workpath removes extension
-        fullworkspacepath = workspacepath + '/' + name
-
-        print('Running job for:' + fullworkspacepath)
-        values = {'run_name': name, 'existing_cluster_id': clusterid, 'timeout_seconds': 3600, 'notebook_task': {'notebook_path': fullworkspacepath}}
-
-        resp = requests.post(workspace + '/api/2.0/jobs/runs/submit',
-                             data=json.dumps(values), auth=("token", token))
-        runjson = resp.text
-        print("runjson:" + runjson)
-        d = json.loads(runjson)
-        runid = d['run_id']
-
-        i = 0
-        waiting = True
-        while waiting:
-            time.sleep(10)
-            jobresp = requests.get(workspace + '/api/2.0/jobs/runs/get?run_id='+str(runid),
-                                   data=json.dumps(values), auth=("token", token))
-            jobjson = jobresp.text
-            print("jobjson:" + jobjson)
-            j = json.loads(jobjson)
-            current_state = j['state']['life_cycle_state']
-            runid = j['run_id']
-            if current_state in ['TERMINATED', 'INTERNAL_ERROR', 'SKIPPED'] or i >= 24:
-                break
-            i = i + 1
-
-        if outfilepath != '':
-            file = open(outfilepath + '/' +  str(runid) + '.json', 'w')
-            file.write(json.dumps(j))
-            file.close()
+    # # run each element in list
+    # for notebook in notebooks:
+    #     nameonly = os.path.basename(notebook[0])
+    #     workspacepath = notebook[1]
+    #
+    #     name, file_extension = os.path.splitext(nameonly)
+    #
+    #     # workpath removes extension
+    #     fullworkspacepath = workspacepath + '/' + name
+    #
+    #     print('Running job for:' + fullworkspacepath)
+    #     values = {'run_name': name, 'existing_cluster_id': clusterid, 'timeout_seconds': 3600, 'notebook_task': {'notebook_path': fullworkspacepath}}
+    #
+    #     resp = requests.post(workspace + '/api/2.0/jobs/runs/submit',
+    #                          data=json.dumps(values), auth=("token", token))
+    #     runjson = resp.text
+    #     print("runjson:" + runjson)
+    #     d = json.loads(runjson)
+    #     runid = d['run_id']
+    #
+    #     i = 0
+    #     waiting = True
+    #     while waiting:
+    #         time.sleep(20)
+    #         jobresp = requests.get(workspace + '/api/2.0/jobs/runs/get?run_id='+str(runid),
+    #                                data=json.dumps(values), auth=("token", token))
+    #         jobjson = jobresp.text
+    #         print("jobjson:" + jobjson)
+    #         j = json.loads(jobjson)
+    #         current_state = j['state']['life_cycle_state']
+    #         runid = j['run_id']
+    #         if current_state in ['TERMINATED', 'INTERNAL_ERROR', 'SKIPPED'] or i >= 24:
+    #             break
+    #         i = i + 1
+    #
+    #     if outfilepath != '':
+    #         file = open(outfilepath + '/' +  str(runid) + '.json', 'w')
+    #         file.write(json.dumps(j))
+    #         file.close()
 
 if __name__ == '__main__':
     main()
\ No newline at end of file
diff --git a/Jenkinsfile b/Jenkinsfile
index 0321de3..09e5f08 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -8,6 +8,7 @@ def NOTEBOOKPATH = "./databricks/python"
 def WORKSPACEPATH = "/Shared/Spark OCR/tests"
 def OUTFILEPATH = "."
 def TESTRESULTPATH = "./reports/junit"
+def IGNORE = "3. Compare CPU and GPU image processing with Spark OCR.ipynb"
 
 pipeline {
     agent {
@@ -45,7 +46,8 @@ pipeline {
                                         --clusterid=$CLUSTERID\
                                         --localpath=${NOTEBOOKPATH}\
                                         --workspacepath='${WORKSPACEPATH}'\
-                                        --outfilepath='${OUTFILEPATH}'
+                                        --outfilepath='${OUTFILEPATH}'\
+                                        --ignore='${IGNORE}'
                            """
                         sh """sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py
                               python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py

From 003953aa34a2bb350779567834a1319176adbbc7 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Fri, 27 May 2022 14:40:26 +0300
Subject: [PATCH 048/113] Updated Jenkinsfile

---
 .ci/executenotebook.py | 78 +++++++++++++++++++++---------------------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/.ci/executenotebook.py b/.ci/executenotebook.py
index 9fdf2ee..7fc5766 100644
--- a/.ci/executenotebook.py
+++ b/.ci/executenotebook.py
@@ -72,45 +72,45 @@ def main():
                 row = [fullpath, fullworkspacepath, 1]
                 notebooks.append(row)
 
-    # # run each element in list
-    # for notebook in notebooks:
-    #     nameonly = os.path.basename(notebook[0])
-    #     workspacepath = notebook[1]
-    #
-    #     name, file_extension = os.path.splitext(nameonly)
-    #
-    #     # workpath removes extension
-    #     fullworkspacepath = workspacepath + '/' + name
-    #
-    #     print('Running job for:' + fullworkspacepath)
-    #     values = {'run_name': name, 'existing_cluster_id': clusterid, 'timeout_seconds': 3600, 'notebook_task': {'notebook_path': fullworkspacepath}}
-    #
-    #     resp = requests.post(workspace + '/api/2.0/jobs/runs/submit',
-    #                          data=json.dumps(values), auth=("token", token))
-    #     runjson = resp.text
-    #     print("runjson:" + runjson)
-    #     d = json.loads(runjson)
-    #     runid = d['run_id']
-    #
-    #     i = 0
-    #     waiting = True
-    #     while waiting:
-    #         time.sleep(20)
-    #         jobresp = requests.get(workspace + '/api/2.0/jobs/runs/get?run_id='+str(runid),
-    #                                data=json.dumps(values), auth=("token", token))
-    #         jobjson = jobresp.text
-    #         print("jobjson:" + jobjson)
-    #         j = json.loads(jobjson)
-    #         current_state = j['state']['life_cycle_state']
-    #         runid = j['run_id']
-    #         if current_state in ['TERMINATED', 'INTERNAL_ERROR', 'SKIPPED'] or i >= 24:
-    #             break
-    #         i = i + 1
-    #
-    #     if outfilepath != '':
-    #         file = open(outfilepath + '/' +  str(runid) + '.json', 'w')
-    #         file.write(json.dumps(j))
-    #         file.close()
+    # run each element in list
+    for notebook in notebooks:
+        nameonly = os.path.basename(notebook[0])
+        workspacepath = notebook[1]
+
+        name, file_extension = os.path.splitext(nameonly)
+
+        # workpath removes extension
+        fullworkspacepath = workspacepath + '/' + name
+
+        print('Running job for:' + fullworkspacepath)
+        values = {'run_name': name, 'existing_cluster_id': clusterid, 'timeout_seconds': 3600, 'notebook_task': {'notebook_path': fullworkspacepath}}
+
+        resp = requests.post(workspace + '/api/2.0/jobs/runs/submit',
+                             data=json.dumps(values), auth=("token", token))
+        runjson = resp.text
+        print("runjson:" + runjson)
+        d = json.loads(runjson)
+        runid = d['run_id']
+
+        i = 0
+        waiting = True
+        while waiting:
+            time.sleep(20)
+            jobresp = requests.get(workspace + '/api/2.0/jobs/runs/get?run_id='+str(runid),
+                                   data=json.dumps(values), auth=("token", token))
+            jobjson = jobresp.text
+            print("jobjson:" + jobjson)
+            j = json.loads(jobjson)
+            current_state = j['state']['life_cycle_state']
+            runid = j['run_id']
+            if current_state in ['TERMINATED', 'INTERNAL_ERROR', 'SKIPPED'] or i >= 24:
+                break
+            i = i + 1
+
+        if outfilepath != '':
+            file = open(outfilepath + '/' +  str(runid) + '.json', 'w')
+            file.write(json.dumps(j))
+            file.close()
 
 if __name__ == '__main__':
     main()
\ No newline at end of file

From eb796417c03aff635637ffa9d580caf60afb792f Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Fri, 27 May 2022 15:39:40 +0300
Subject: [PATCH 049/113] Updated Jenkinsfile

---
 Jenkinsfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Jenkinsfile b/Jenkinsfile
index 09e5f08..30335be 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -59,6 +59,7 @@ pipeline {
     }
     post {
         always {
+            sh "find ${OUTFILEPATH} -name '*.json' -exec rm {} +"
             junit allowEmptyResults: true, testResults: "**/reports/junit/*.xml"
         }
     }

From dae45d7498b2e5343ead33e95ff1d7c6727daf60 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Sat, 28 May 2022 08:04:19 +0300
Subject: [PATCH 050/113] Updated Jenkinsfile

---
 Jenkinsfile | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/Jenkinsfile b/Jenkinsfile
index 30335be..67d177c 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -37,6 +37,22 @@ pipeline {
                 }
             }
         }
+        stage('Start cluster') {
+            steps {
+                script {
+                    sh("databricks clusters start --cluster-id ${CLUSTERID}")
+                    timeout(5) {
+                        waitUntil {
+                           script {
+                             def respString = sh script: "databricks clusters get --cluster-id ${CLUSTERID}", returnStdout: true
+                             def respJson = readJSON text: respString
+                             return (respJson['state'] == 'RUNNING');
+                           }
+                        }
+                    }
+                }
+            }
+        }
         stage('Run Notebook Tests') {
             steps {
                 script {

From 5e3dacc56eed94332c2a3034a61f3818a0224ded Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Sat, 28 May 2022 08:07:56 +0300
Subject: [PATCH 051/113] Updated Jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 67d177c..394fb69 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -40,7 +40,7 @@ pipeline {
         stage('Start cluster') {
             steps {
                 script {
-                    sh("databricks clusters start --cluster-id ${CLUSTERID}")
+                    sh("databricks clusters start --cluster-id ${CLUSTERID} || True")
                     timeout(5) {
                         waitUntil {
                            script {

From 3fd5ff54e6a9b61516f7ab2c81a21b121e9bb164 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Sat, 28 May 2022 08:09:09 +0300
Subject: [PATCH 052/113] Updated Jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 394fb69..9fed778 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -40,7 +40,7 @@ pipeline {
         stage('Start cluster') {
             steps {
                 script {
-                    sh("databricks clusters start --cluster-id ${CLUSTERID} || True")
+                    sh("databricks clusters start --cluster-id ${CLUSTERID} || true")
                     timeout(5) {
                         waitUntil {
                            script {

From d3b2667c0c3774c6ae737f239bc4c03156d404c4 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 31 May 2022 13:37:52 +0300
Subject: [PATCH 053/113] Updated jenkinsfile

---
 Jenkinsfile | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 9fed778..ef6cb77 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -10,6 +10,13 @@ def OUTFILEPATH = "."
 def TESTRESULTPATH = "./reports/junit"
 def IGNORE = "3. Compare CPU and GPU image processing with Spark OCR.ipynb"
 
+def SPARK_NLP_VERSION = "3.4.2"
+def SPARK_NLP_HEALTHCARE_VERSION = "3.4.2"
+def SPARK_OCR_VERSION = "3.12.0"
+
+def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION)
+def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
+
 pipeline {
     agent {
         dockerfile {
@@ -41,7 +48,7 @@ pipeline {
             steps {
                 script {
                     sh("databricks clusters start --cluster-id ${CLUSTERID} || true")
-                    timeout(5) {
+                    timeout(10) {
                         waitUntil {
                            script {
                              def respString = sh script: "databricks clusters get --cluster-id ${CLUSTERID}", returnStdout: true
@@ -53,6 +60,19 @@ pipeline {
                 }
             }
         }
+        stage('Install deps to Cluster') {
+            steps {
+                script {
+                    sh("databricks libraries uninstall --cluster-id ${CLUSTERID} --all")
+                    sh("databricks libraries install --cluster-id ${CLUSTERID} --jar  s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-spark30.jar")
+                    sh("databricks libraries install --cluster-id ${CLUSTERID} --jar  s3://pypi.johnsnowlabs.com/${SPARK_NLP_HEALTHCARE_VERSION}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}.jar")
+                    sh("databricks libraries install --cluster-id ${CLUSTERID} --maven-coordinates com.johnsnowlabs.nlp:spark-nlp_2.12:${SPARK_NLP_VERSION}")
+                    sh("databricks libraries install --cluster-id ${CLUSTERID} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/spark-ocr/spark_ocr-${SPARK_OCR_VERSION}+spark30-py3-none-any.whl")
+                    sh("databricks libraries install --cluster-id ${CLUSTERID} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl/spark_nlp_jsl-${SPARK_NLP_VERSION}-py3-none-any.whl")
+                    sh("databricks libraries install --cluster-id ${CLUSTERID} --pypi-package spark-nlp==${SPARK_NLP_VERSION}")
+                }
+            }
+        }
         stage('Run Notebook Tests') {
             steps {
                 script {

From 05e8700cee458b731fce5f9bb3e750981f0c1453 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 31 May 2022 13:44:06 +0300
Subject: [PATCH 054/113] Updated jenkinsfile

---
 Jenkinsfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Jenkinsfile b/Jenkinsfile
index ef6cb77..735079a 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -25,6 +25,7 @@ pipeline {
     }
     environment {
         DATABRICKS_CONFIG_FILE = ".databricks.cfg"
+        GITHUB_CREDS = credentials('55e7e818-4ccf-4d23-b54c-fd97c21081ba')
     }
     stages {
         stage('Setup') {

From 2ccd7c0788857320b6b220c4d830b940fb379f2e Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 31 May 2022 13:53:00 +0300
Subject: [PATCH 055/113] Updated jenkinsfile

---
 Jenkinsfile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 735079a..29c5c1e 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -14,9 +14,6 @@ def SPARK_NLP_VERSION = "3.4.2"
 def SPARK_NLP_HEALTHCARE_VERSION = "3.4.2"
 def SPARK_OCR_VERSION = "3.12.0"
 
-def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION)
-def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
-
 pipeline {
     agent {
         dockerfile {
@@ -64,6 +61,9 @@ pipeline {
         stage('Install deps to Cluster') {
             steps {
                 script {
+                    def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION)
+                    def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
+
                     sh("databricks libraries uninstall --cluster-id ${CLUSTERID} --all")
                     sh("databricks libraries install --cluster-id ${CLUSTERID} --jar  s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-spark30.jar")
                     sh("databricks libraries install --cluster-id ${CLUSTERID} --jar  s3://pypi.johnsnowlabs.com/${SPARK_NLP_HEALTHCARE_VERSION}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}.jar")

From 772cbcf844b01d01cd7bc1dc04200f14e6b945ba Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 31 May 2022 13:57:47 +0300
Subject: [PATCH 056/113] Updated jenkinsfile

---
 .ci/Dockerfile.build |  2 +-
 Jenkinsfile          | 32 ++++++++++++++++----------------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/.ci/Dockerfile.build b/.ci/Dockerfile.build
index 5d15c6c..19de535 100644
--- a/.ci/Dockerfile.build
+++ b/.ci/Dockerfile.build
@@ -3,4 +3,4 @@ FROM python:3.7-alpine
 
 ENV LC_ALL=C
 
-RUN pip install databricks-cli requests pytest
\ No newline at end of file
+RUN pip install databricks-cli requests pytest gh
\ No newline at end of file
diff --git a/Jenkinsfile b/Jenkinsfile
index 29c5c1e..580ddd6 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -42,22 +42,6 @@ pipeline {
                 }
             }
         }
-        stage('Start cluster') {
-            steps {
-                script {
-                    sh("databricks clusters start --cluster-id ${CLUSTERID} || true")
-                    timeout(10) {
-                        waitUntil {
-                           script {
-                             def respString = sh script: "databricks clusters get --cluster-id ${CLUSTERID}", returnStdout: true
-                             def respJson = readJSON text: respString
-                             return (respJson['state'] == 'RUNNING');
-                           }
-                        }
-                    }
-                }
-            }
-        }
         stage('Install deps to Cluster') {
             steps {
                 script {
@@ -74,6 +58,22 @@ pipeline {
                 }
             }
         }
+        stage('Start cluster') {
+            steps {
+                script {
+                    sh("databricks clusters restart --cluster-id ${CLUSTERID} || true")
+                    timeout(10) {
+                        waitUntil {
+                           script {
+                             def respString = sh script: "databricks clusters get --cluster-id ${CLUSTERID}", returnStdout: true
+                             def respJson = readJSON text: respString
+                             return (respJson['state'] == 'RUNNING');
+                           }
+                        }
+                    }
+                }
+            }
+        }
         stage('Run Notebook Tests') {
             steps {
                 script {

From 06e069a16cdf4972a1764641293626823c0cdc03 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 31 May 2022 14:01:03 +0300
Subject: [PATCH 057/113] Updated jenkinsfile

---
 .ci/Dockerfile.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.ci/Dockerfile.build b/.ci/Dockerfile.build
index 19de535..5febfa2 100644
--- a/.ci/Dockerfile.build
+++ b/.ci/Dockerfile.build
@@ -3,4 +3,4 @@ FROM python:3.7-alpine
 
 ENV LC_ALL=C
 
-RUN pip install databricks-cli requests pytest gh
\ No newline at end of file
+RUN pip install databricks-cli requests pytest gh git
\ No newline at end of file

From 44f8ebafa75b5f16aa5af0823a7e5bdf5fc192c6 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 31 May 2022 14:09:43 +0300
Subject: [PATCH 058/113] Updated jenkinsfile

---
 .ci/Dockerfile.build | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.ci/Dockerfile.build b/.ci/Dockerfile.build
index 5febfa2..283f72b 100644
--- a/.ci/Dockerfile.build
+++ b/.ci/Dockerfile.build
@@ -3,4 +3,6 @@ FROM python:3.7-alpine
 
 ENV LC_ALL=C
 
-RUN pip install databricks-cli requests pytest gh git
\ No newline at end of file
+RUN apt-get -y update && apt-get -y install git
+
+RUN pip install databricks-cli requests pytest gh
\ No newline at end of file

From 7e03afd5f60935923293b24c153269445c63ea6a Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 31 May 2022 14:14:01 +0300
Subject: [PATCH 059/113] Updated jenkinsfile

---
 .ci/Dockerfile.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.ci/Dockerfile.build b/.ci/Dockerfile.build
index 283f72b..5d26695 100644
--- a/.ci/Dockerfile.build
+++ b/.ci/Dockerfile.build
@@ -3,6 +3,6 @@ FROM python:3.7-alpine
 
 ENV LC_ALL=C
 
-RUN apt-get -y update && apt-get -y install git
+RUN apk add --no-cache git
 
 RUN pip install databricks-cli requests pytest gh
\ No newline at end of file

From bc97e15684f7bd827027d8b0c0ce66ec30c1f0da Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 31 May 2022 14:21:48 +0300
Subject: [PATCH 060/113] Updated jenkinsfile

---
 .ci/Dockerfile.build | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.ci/Dockerfile.build b/.ci/Dockerfile.build
index 5d26695..5d15c6c 100644
--- a/.ci/Dockerfile.build
+++ b/.ci/Dockerfile.build
@@ -3,6 +3,4 @@ FROM python:3.7-alpine
 
 ENV LC_ALL=C
 
-RUN apk add --no-cache git
-
-RUN pip install databricks-cli requests pytest gh
\ No newline at end of file
+RUN pip install databricks-cli requests pytest
\ No newline at end of file

From da861ef0be74558907e3ea08d16b39297ee7b273 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 31 May 2022 14:26:34 +0300
Subject: [PATCH 061/113] Updated jenkinsfile

---
 Jenkinsfile | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 580ddd6..cc48653 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -14,6 +14,10 @@ def SPARK_NLP_VERSION = "3.4.2"
 def SPARK_NLP_HEALTHCARE_VERSION = "3.4.2"
 def SPARK_OCR_VERSION = "3.12.0"
 
+def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION)
+def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
+
+
 pipeline {
     agent {
         dockerfile {
@@ -45,9 +49,6 @@ pipeline {
         stage('Install deps to Cluster') {
             steps {
                 script {
-                    def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION)
-                    def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
-
                     sh("databricks libraries uninstall --cluster-id ${CLUSTERID} --all")
                     sh("databricks libraries install --cluster-id ${CLUSTERID} --jar  s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-spark30.jar")
                     sh("databricks libraries install --cluster-id ${CLUSTERID} --jar  s3://pypi.johnsnowlabs.com/${SPARK_NLP_HEALTHCARE_VERSION}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}.jar")

From 0b213a3da5c59c3449091d292c994bdaf7cea0f8 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 31 May 2022 14:30:12 +0300
Subject: [PATCH 062/113] Updated jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index cc48653..2b55ddf 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -51,7 +51,7 @@ pipeline {
                 script {
                     sh("databricks libraries uninstall --cluster-id ${CLUSTERID} --all")
                     sh("databricks libraries install --cluster-id ${CLUSTERID} --jar  s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-spark30.jar")
-                    sh("databricks libraries install --cluster-id ${CLUSTERID} --jar  s3://pypi.johnsnowlabs.com/${SPARK_NLP_HEALTHCARE_VERSION}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}.jar")
+                    sh("databricks libraries install --cluster-id ${CLUSTERID} --jar  s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}.jar")
                     sh("databricks libraries install --cluster-id ${CLUSTERID} --maven-coordinates com.johnsnowlabs.nlp:spark-nlp_2.12:${SPARK_NLP_VERSION}")
                     sh("databricks libraries install --cluster-id ${CLUSTERID} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/spark-ocr/spark_ocr-${SPARK_OCR_VERSION}+spark30-py3-none-any.whl")
                     sh("databricks libraries install --cluster-id ${CLUSTERID} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl/spark_nlp_jsl-${SPARK_NLP_VERSION}-py3-none-any.whl")

From c0c305941be37a648c91c43ef132c1613ab78434 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 31 May 2022 16:49:20 +0300
Subject: [PATCH 063/113] Updated jenkinsfile

---
 Jenkinsfile | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 2b55ddf..bc9fe4e 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -62,7 +62,13 @@ pipeline {
         stage('Start cluster') {
             steps {
                 script {
-                    sh("databricks clusters restart --cluster-id ${CLUSTERID} || true")
+                    def respString = sh script: "databricks clusters get --cluster-id ${CLUSTERID}", returnStdout: true
+                    def respJson = readJSON text: respString
+                    if (respJson['state'] == 'RUNNING') {
+                        sh("databricks clusters restart --cluster-id ${CLUSTERID}")
+                    } else {
+                        sh("databricks clusters start --cluster-id ${CLUSTERID}")
+                    }
                     timeout(10) {
                         waitUntil {
                            script {

From 6d4258dabf8ebbd86c0dc25b61366dc7cb32c735 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Tue, 31 May 2022 16:51:41 +0300
Subject: [PATCH 064/113] Updated jenkinsfile

---
 Jenkinsfile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index bc9fe4e..87e572b 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -72,9 +72,9 @@ pipeline {
                     timeout(10) {
                         waitUntil {
                            script {
-                             def respString = sh script: "databricks clusters get --cluster-id ${CLUSTERID}", returnStdout: true
-                             def respJson = readJSON text: respString
-                             return (respJson['state'] == 'RUNNING');
+                             def respStringWait = sh script: "databricks clusters get --cluster-id ${CLUSTERID}", returnStdout: true
+                             def respJsonWait = readJSON text: respStringWait
+                             return (respJsonWait['state'] == 'RUNNING');
                            }
                         }
                     }

From ea41871a8ba697f9def111b1dd54c42c110599dc Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 1 Jun 2022 08:52:37 +0300
Subject: [PATCH 065/113] Updated jenkinsfile

---
 Jenkinsfile | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/Jenkinsfile b/Jenkinsfile
index 87e572b..5cf878f 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -18,6 +18,12 @@ def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(S
 def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
 
 
+def runtimeRespString = sh script: "databricks clusters spark-versions", returnStdout: true
+def runtimeRespJson = readJSON text: runtimeRespString
+
+def runtimes = runtimeRespJson['versions'].collect { it['key'] }.join('\n')
+
+
 pipeline {
     agent {
         dockerfile {
@@ -28,6 +34,13 @@ pipeline {
         DATABRICKS_CONFIG_FILE = ".databricks.cfg"
         GITHUB_CREDS = credentials('55e7e818-4ccf-4d23-b54c-fd97c21081ba')
     }
+    parameters {
+        choice(
+            name:'databricks_runtime',
+            choices:runtimes,
+            description:'define spark version'
+        )
+    }
     stages {
         stage('Setup') {
             steps {

From cd957ffb284c329085a23956c1a2645719c10ef7 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 1 Jun 2022 08:53:59 +0300
Subject: [PATCH 066/113] Updated jenkinsfile

---
 Jenkinsfile | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 5cf878f..3a06755 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -17,12 +17,11 @@ def SPARK_OCR_VERSION = "3.12.0"
 def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION)
 def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
 
-
-def runtimeRespString = sh script: "databricks clusters spark-versions", returnStdout: true
-def runtimeRespJson = readJSON text: runtimeRespString
-
-def runtimes = runtimeRespJson['versions'].collect { it['key'] }.join('\n')
-
+node {
+    def runtimeRespString = sh script: "databricks clusters spark-versions", returnStdout: true
+    def runtimeRespJson = readJSON text: runtimeRespString
+    def runtimes = runtimeRespJson['versions'].collect { it['key'] }.join('\n')
+}
 
 pipeline {
     agent {

From 21c5815c8be5a7aa18395ce613934b91f4f73baa Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 1 Jun 2022 08:56:36 +0300
Subject: [PATCH 067/113] Updated jenkinsfile

---
 Jenkinsfile | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 3a06755..0c2a43c 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -17,7 +17,11 @@ def SPARK_OCR_VERSION = "3.12.0"
 def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION)
 def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
 
-node {
+dockerNode {
+    withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
+        sh('echo "${TOKEN}" > secret.txt')
+        sh("databricks configure --token-file secret.txt --host ${DBURL}")
+    }
     def runtimeRespString = sh script: "databricks clusters spark-versions", returnStdout: true
     def runtimeRespJson = readJSON text: runtimeRespString
     def runtimes = runtimeRespJson['versions'].collect { it['key'] }.join('\n')

From c53b92ba312244fc57f975ee2da5b1815ca992a8 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 1 Jun 2022 09:26:05 +0300
Subject: [PATCH 068/113] Updated jenkinsfile

---
 Jenkinsfile | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 0c2a43c..0bb8cce 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -17,15 +17,6 @@ def SPARK_OCR_VERSION = "3.12.0"
 def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION)
 def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
 
-dockerNode {
-    withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
-        sh('echo "${TOKEN}" > secret.txt')
-        sh("databricks configure --token-file secret.txt --host ${DBURL}")
-    }
-    def runtimeRespString = sh script: "databricks clusters spark-versions", returnStdout: true
-    def runtimeRespJson = readJSON text: runtimeRespString
-    def runtimes = runtimeRespJson['versions'].collect { it['key'] }.join('\n')
-}
 
 pipeline {
     agent {
@@ -40,7 +31,7 @@ pipeline {
     parameters {
         choice(
             name:'databricks_runtime',
-            choices:runtimes,
+            choices:'6.4.x-esr-scala2.11\n7.3.x-cpu-ml-scala2.12\n7.3.x-hls-scala2.12\n10.2.x-gpu-ml-scala2.12\n10.5.x-aarch64-scala2.12\n7.3.x-gpu-ml-scala2.12\n10.2.x-aarch64-photon-scala2.12\n10.4.x-cpu-ml-scala2.12\n9.1.x-aarch64-scala2.12\n10.1.x-photon-scala2.12\n9.1.x-photon-scala2.12\n10.4.x-scala2.12\n10.2.x-photon-scala2.12\n10.4.x-photon-scala2.12\n11.0.x-photon-scala2.12\n10.3.x-photon-scala2.12\n10.5.x-photon-scala2.12\n10.1.x-gpu-ml-scala2.12\n9.1.x-scala2.12\n11.0.x-scala2.12\n10.3.x-cpu-ml-scala2.12\n10.3.x-aarch64-photon-scala2.12\n11.0.x-gpu-ml-scala2.12\n10.5.x-aarch64-photon-scala2.12\n10.1.x-cpu-ml-scala2.12\n10.4.x-aarch64-photon-scala2.12\n10.5.x-gpu-ml-scala2.12\napache-spark-2.4.x-esr-scala2.11\n10.1.x-scala2.12\n9.1.x-cpu-ml-scala2.12\n11.0.x-cpu-ml-scala2.12\n10.2.x-aarch64-scala2.12\n10.2.x-scala2.12\n10.2.x-cpu-ml-scala2.12\n11.0.x-aarch64-photon-scala2.12\n10.4.x-aarch64-scala2.12\n11.0.x-aarch64-scala2.12\n10.1.x-aarch64-scala2.12\n9.1.x-gpu-ml-scala2.12\napache-spark-2.4.x-scala2.11\n10.5.x-scala2.12\n7.3.x-scala2.12\n10.3.x-scala2.12\n10.3.x-aarch64-scala2.12\n10.5.x-cpu-ml-scala2.12\n10.3.x-gpu-ml-scala2.12\n10.4.x-gpu-ml-scala2.12',
             description:'define spark version'
         )
     }

From 4d62a3fc0f440c0079f06b7b9500fd0efd29bebb Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 1 Jun 2022 14:29:20 +0300
Subject: [PATCH 069/113] Updated jenkinsfile

---
 Jenkinsfile | 42 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 0bb8cce..85eea66 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -2,7 +2,7 @@
 
 def DBTOKEN = "DATABRICKS_TOKEN"
 def DBURL = "https://dbc-6ca13d9d-74bb.cloud.databricks.com"
-def CLUSTERID = "0428-112519-vaxgi8gx"
+//def CLUSTERID = "0428-112519-vaxgi8gx"
 def SCRIPTPATH = "./.ci"
 def NOTEBOOKPATH = "./databricks/python"
 def WORKSPACEPATH = "/Shared/Spark OCR/tests"
@@ -33,6 +33,7 @@ pipeline {
             name:'databricks_runtime',
             choices:'6.4.x-esr-scala2.11\n7.3.x-cpu-ml-scala2.12\n7.3.x-hls-scala2.12\n10.2.x-gpu-ml-scala2.12\n10.5.x-aarch64-scala2.12\n7.3.x-gpu-ml-scala2.12\n10.2.x-aarch64-photon-scala2.12\n10.4.x-cpu-ml-scala2.12\n9.1.x-aarch64-scala2.12\n10.1.x-photon-scala2.12\n9.1.x-photon-scala2.12\n10.4.x-scala2.12\n10.2.x-photon-scala2.12\n10.4.x-photon-scala2.12\n11.0.x-photon-scala2.12\n10.3.x-photon-scala2.12\n10.5.x-photon-scala2.12\n10.1.x-gpu-ml-scala2.12\n9.1.x-scala2.12\n11.0.x-scala2.12\n10.3.x-cpu-ml-scala2.12\n10.3.x-aarch64-photon-scala2.12\n11.0.x-gpu-ml-scala2.12\n10.5.x-aarch64-photon-scala2.12\n10.1.x-cpu-ml-scala2.12\n10.4.x-aarch64-photon-scala2.12\n10.5.x-gpu-ml-scala2.12\napache-spark-2.4.x-esr-scala2.11\n10.1.x-scala2.12\n9.1.x-cpu-ml-scala2.12\n11.0.x-cpu-ml-scala2.12\n10.2.x-aarch64-scala2.12\n10.2.x-scala2.12\n10.2.x-cpu-ml-scala2.12\n11.0.x-aarch64-photon-scala2.12\n10.4.x-aarch64-scala2.12\n11.0.x-aarch64-scala2.12\n10.1.x-aarch64-scala2.12\n9.1.x-gpu-ml-scala2.12\napache-spark-2.4.x-scala2.11\n10.5.x-scala2.12\n7.3.x-scala2.12\n10.3.x-scala2.12\n10.3.x-aarch64-scala2.12\n10.5.x-cpu-ml-scala2.12\n10.3.x-gpu-ml-scala2.12\n10.4.x-gpu-ml-scala2.12',
             description:'define spark version'
+            defaultValue: '7.3.x-scala2.12'
         )
     }
     stages {
@@ -53,6 +54,45 @@ pipeline {
                 }
             }
         }
+        stage('Create Cluster') {
+            steps {
+                script {
+                    withCredentials([string(credentialsId:'TEST_SPARK_OCR_LICENSE',variable:'SPARK_OCR_LICENSE'),[
+                        $class: 'AmazonWebServicesCredentialsBinding',
+                        credentialsId: 'a4362e3b-808e-45e0-b7d2-1c62b0572df4',
+                        accessKeyVariable: 'AWS_ACCESS_KEY_ID',
+                        secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']]) {
+                        def jsonCluster = '''
+                        {
+                              "num_workers": 1,
+                              "cluster_name": "Spark Ocr Notebook Test",
+                              "spark_version": "${databricks_runtime}",
+                              "spark_conf": {
+                                  "spark.sql.legacy.allowUntypedScalaUDF": "true"
+                              },
+                              "aws_attributes": {
+                                  "first_on_demand": 1,
+                                  "availability": "SPOT_WITH_FALLBACK",
+                                  "zone_id": "us-west-2a",
+                                  "spot_bid_price_percent": 100,
+                                  "ebs_volume_count": 0
+                              },
+                              "node_type_id": "i3.xlarge",
+                              "driver_node_type_id": "i3.xlarge",
+                              "spark_env_vars": {
+                                  "JSL_OCR_LICENSE": "${SPARK_OCR_LICENSE}",
+                                  "AWS_ACCESS_KEY_ID": "${AWS_ACCESS_KEY_ID}",
+                                  "AWS_SECRET_ACCESS_KEY": "${AWS_SECRET_ACCESS_KEY}"
+                              },
+                              "autotermination_minutes": 20,
+                        }
+                        '''
+                        def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json ${jsonCluster}")
+                        def CLUSTERID = (readJSON text: clusterRespString)['cluster_id']
+                    }
+                }
+            }
+        }
         stage('Install deps to Cluster') {
             steps {
                 script {

From b1b376b55fb51c25a43f233225de818872f3422d Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 1 Jun 2022 14:33:16 +0300
Subject: [PATCH 070/113] Updated jenkinsfile

---
 Jenkinsfile | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 85eea66..84dd1b6 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -32,7 +32,7 @@ pipeline {
         choice(
             name:'databricks_runtime',
             choices:'6.4.x-esr-scala2.11\n7.3.x-cpu-ml-scala2.12\n7.3.x-hls-scala2.12\n10.2.x-gpu-ml-scala2.12\n10.5.x-aarch64-scala2.12\n7.3.x-gpu-ml-scala2.12\n10.2.x-aarch64-photon-scala2.12\n10.4.x-cpu-ml-scala2.12\n9.1.x-aarch64-scala2.12\n10.1.x-photon-scala2.12\n9.1.x-photon-scala2.12\n10.4.x-scala2.12\n10.2.x-photon-scala2.12\n10.4.x-photon-scala2.12\n11.0.x-photon-scala2.12\n10.3.x-photon-scala2.12\n10.5.x-photon-scala2.12\n10.1.x-gpu-ml-scala2.12\n9.1.x-scala2.12\n11.0.x-scala2.12\n10.3.x-cpu-ml-scala2.12\n10.3.x-aarch64-photon-scala2.12\n11.0.x-gpu-ml-scala2.12\n10.5.x-aarch64-photon-scala2.12\n10.1.x-cpu-ml-scala2.12\n10.4.x-aarch64-photon-scala2.12\n10.5.x-gpu-ml-scala2.12\napache-spark-2.4.x-esr-scala2.11\n10.1.x-scala2.12\n9.1.x-cpu-ml-scala2.12\n11.0.x-cpu-ml-scala2.12\n10.2.x-aarch64-scala2.12\n10.2.x-scala2.12\n10.2.x-cpu-ml-scala2.12\n11.0.x-aarch64-photon-scala2.12\n10.4.x-aarch64-scala2.12\n11.0.x-aarch64-scala2.12\n10.1.x-aarch64-scala2.12\n9.1.x-gpu-ml-scala2.12\napache-spark-2.4.x-scala2.11\n10.5.x-scala2.12\n7.3.x-scala2.12\n10.3.x-scala2.12\n10.3.x-aarch64-scala2.12\n10.5.x-cpu-ml-scala2.12\n10.3.x-gpu-ml-scala2.12\n10.4.x-gpu-ml-scala2.12',
-            description:'define spark version'
+            description:'define spark version',
             defaultValue: '7.3.x-scala2.12'
         )
     }
@@ -64,27 +64,27 @@ pipeline {
                         secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']]) {
                         def jsonCluster = '''
                         {
-                              "num_workers": 1,
-                              "cluster_name": "Spark Ocr Notebook Test",
-                              "spark_version": "${databricks_runtime}",
-                              "spark_conf": {
-                                  "spark.sql.legacy.allowUntypedScalaUDF": "true"
-                              },
-                              "aws_attributes": {
-                                  "first_on_demand": 1,
-                                  "availability": "SPOT_WITH_FALLBACK",
-                                  "zone_id": "us-west-2a",
-                                  "spot_bid_price_percent": 100,
-                                  "ebs_volume_count": 0
-                              },
-                              "node_type_id": "i3.xlarge",
-                              "driver_node_type_id": "i3.xlarge",
-                              "spark_env_vars": {
-                                  "JSL_OCR_LICENSE": "${SPARK_OCR_LICENSE}",
-                                  "AWS_ACCESS_KEY_ID": "${AWS_ACCESS_KEY_ID}",
-                                  "AWS_SECRET_ACCESS_KEY": "${AWS_SECRET_ACCESS_KEY}"
-                              },
-                              "autotermination_minutes": 20,
+                            "num_workers": 1,
+                            "cluster_name": "Spark Ocr Notebook Test",
+                            "spark_version": "${databricks_runtime}",
+                            "spark_conf": {
+                              "spark.sql.legacy.allowUntypedScalaUDF": "true"
+                            },
+                            "aws_attributes": {
+                              "first_on_demand": 1,
+                              "availability": "SPOT_WITH_FALLBACK",
+                              "zone_id": "us-west-2a",
+                              "spot_bid_price_percent": 100,
+                              "ebs_volume_count": 0
+                            },
+                            "node_type_id": "i3.xlarge",
+                            "driver_node_type_id": "i3.xlarge",
+                            "spark_env_vars": {
+                              "JSL_OCR_LICENSE": "${SPARK_OCR_LICENSE}",
+                              "AWS_ACCESS_KEY_ID": "${AWS_ACCESS_KEY_ID}",
+                              "AWS_SECRET_ACCESS_KEY": "${AWS_SECRET_ACCESS_KEY}"
+                            },
+                            "autotermination_minutes": 20,
                         }
                         '''
                         def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json ${jsonCluster}")

From dcc1719048c687e44b24b8e7e65fd238b468cf6e Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 1 Jun 2022 14:34:53 +0300
Subject: [PATCH 071/113] Updated jenkinsfile

---
 Jenkinsfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Jenkinsfile b/Jenkinsfile
index 84dd1b6..0cbd2b6 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -89,6 +89,7 @@ pipeline {
                         '''
                         def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json ${jsonCluster}")
                         def CLUSTERID = (readJSON text: clusterRespString)['cluster_id']
+                        }
                     }
                 }
             }

From b5342478ebb0b9b6c6c8b376c3e4bd1a88ff92fc Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 1 Jun 2022 14:36:10 +0300
Subject: [PATCH 072/113] Updated jenkinsfile

---
 Jenkinsfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 0cbd2b6..529620b 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -88,7 +88,8 @@ pipeline {
                         }
                         '''
                         def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json ${jsonCluster}")
-                        def CLUSTERID = (readJSON text: clusterRespString)['cluster_id']
+                        def clusterRespJson = readJSON text: clusterRespString
+                        def CLUSTERID = clusterRespJson['cluster_id']
                         }
                     }
                 }

From d88234eeff56288211f2f543c9b3bf10b98ef5bf Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 1 Jun 2022 14:38:41 +0300
Subject: [PATCH 073/113] Updated jenkinsfile

---
 Jenkinsfile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 529620b..b6a3a33 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -90,7 +90,6 @@ pipeline {
                         def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json ${jsonCluster}")
                         def clusterRespJson = readJSON text: clusterRespString
                         def CLUSTERID = clusterRespJson['cluster_id']
-                        }
                     }
                 }
             }

From f4db299ab7175c9054be760c2587da3a08ca7875 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 1 Jun 2022 14:49:27 +0300
Subject: [PATCH 074/113] Updated jenkinsfile

---
 Jenkinsfile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Jenkinsfile b/Jenkinsfile
index b6a3a33..92aee7b 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -17,6 +17,8 @@ def SPARK_OCR_VERSION = "3.12.0"
 def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION)
 def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
 
+def databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime
+
 
 pipeline {
     agent {

From 8f52b6ed075697a20eb11b99a1c98c280156b885 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 1 Jun 2022 14:50:03 +0300
Subject: [PATCH 075/113] Updated jenkinsfile

---
 Jenkinsfile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 92aee7b..27d1232 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -34,8 +34,7 @@ pipeline {
         choice(
             name:'databricks_runtime',
             choices:'6.4.x-esr-scala2.11\n7.3.x-cpu-ml-scala2.12\n7.3.x-hls-scala2.12\n10.2.x-gpu-ml-scala2.12\n10.5.x-aarch64-scala2.12\n7.3.x-gpu-ml-scala2.12\n10.2.x-aarch64-photon-scala2.12\n10.4.x-cpu-ml-scala2.12\n9.1.x-aarch64-scala2.12\n10.1.x-photon-scala2.12\n9.1.x-photon-scala2.12\n10.4.x-scala2.12\n10.2.x-photon-scala2.12\n10.4.x-photon-scala2.12\n11.0.x-photon-scala2.12\n10.3.x-photon-scala2.12\n10.5.x-photon-scala2.12\n10.1.x-gpu-ml-scala2.12\n9.1.x-scala2.12\n11.0.x-scala2.12\n10.3.x-cpu-ml-scala2.12\n10.3.x-aarch64-photon-scala2.12\n11.0.x-gpu-ml-scala2.12\n10.5.x-aarch64-photon-scala2.12\n10.1.x-cpu-ml-scala2.12\n10.4.x-aarch64-photon-scala2.12\n10.5.x-gpu-ml-scala2.12\napache-spark-2.4.x-esr-scala2.11\n10.1.x-scala2.12\n9.1.x-cpu-ml-scala2.12\n11.0.x-cpu-ml-scala2.12\n10.2.x-aarch64-scala2.12\n10.2.x-scala2.12\n10.2.x-cpu-ml-scala2.12\n11.0.x-aarch64-photon-scala2.12\n10.4.x-aarch64-scala2.12\n11.0.x-aarch64-scala2.12\n10.1.x-aarch64-scala2.12\n9.1.x-gpu-ml-scala2.12\napache-spark-2.4.x-scala2.11\n10.5.x-scala2.12\n7.3.x-scala2.12\n10.3.x-scala2.12\n10.3.x-aarch64-scala2.12\n10.5.x-cpu-ml-scala2.12\n10.3.x-gpu-ml-scala2.12\n10.4.x-gpu-ml-scala2.12',
-            description:'define spark version',
-            defaultValue: '7.3.x-scala2.12'
+            description:'define spark version'
         )
     }
     stages {

From 206628909ccc87f1929cd92f1bde30d455e3a7d6 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 1 Jun 2022 14:54:15 +0300
Subject: [PATCH 076/113] Updated jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 27d1232..5fa51b0 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -58,7 +58,7 @@ pipeline {
         stage('Create Cluster') {
             steps {
                 script {
-                    withCredentials([string(credentialsId:'TEST_SPARK_OCR_LICENSE',variable:'SPARK_OCR_LICENSE'),[
+                    withCredentials([string(credentialsId:'TEST_SPARK_NLP_LICENSE',variable:'SPARK_OCR_LICENSE'),[
                         $class: 'AmazonWebServicesCredentialsBinding',
                         credentialsId: 'a4362e3b-808e-45e0-b7d2-1c62b0572df4',
                         accessKeyVariable: 'AWS_ACCESS_KEY_ID',

From 1900ad2eaff9b1f8e991278f5e10a9172d961ea8 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 1 Jun 2022 15:04:52 +0300
Subject: [PATCH 077/113] Updated jenkinsfile

---
 Jenkinsfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Jenkinsfile b/Jenkinsfile
index 5fa51b0..89f8ecb 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -88,6 +88,7 @@ pipeline {
                             "autotermination_minutes": 20,
                         }
                         '''
+                        echo "${jsonCluster}"
                         def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json ${jsonCluster}")
                         def clusterRespJson = readJSON text: clusterRespString
                         def CLUSTERID = clusterRespJson['cluster_id']

From f99d57e73034611ac48a3d075d8658647d9c133c Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 1 Jun 2022 15:15:04 +0300
Subject: [PATCH 078/113] Updated jenkinsfile

---
 Jenkinsfile | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 89f8ecb..5edc5d3 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1,5 +1,7 @@
 @Library('jenkinslib')_
 
+databricks_runtime = ""
+
 def DBTOKEN = "DATABRICKS_TOKEN"
 def DBURL = "https://dbc-6ca13d9d-74bb.cloud.databricks.com"
 //def CLUSTERID = "0428-112519-vaxgi8gx"
@@ -17,7 +19,7 @@ def SPARK_OCR_VERSION = "3.12.0"
 def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION)
 def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
 
-def databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime
+databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime
 
 
 pipeline {
@@ -63,7 +65,7 @@ pipeline {
                         credentialsId: 'a4362e3b-808e-45e0-b7d2-1c62b0572df4',
                         accessKeyVariable: 'AWS_ACCESS_KEY_ID',
                         secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']]) {
-                        def jsonCluster = '''
+                        def jsonCluster = """
                         {
                             "num_workers": 1,
                             "cluster_name": "Spark Ocr Notebook Test",
@@ -87,7 +89,7 @@ pipeline {
                             },
                             "autotermination_minutes": 20,
                         }
-                        '''
+                        """
                         echo "${jsonCluster}"
                         def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json ${jsonCluster}")
                         def clusterRespJson = readJSON text: clusterRespString

From 3735bcebe8c7d764434138be529c58a59de3bf7a Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 1 Jun 2022 15:40:15 +0300
Subject: [PATCH 079/113] Updated jenkinsfile

---
 Jenkinsfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 5edc5d3..f17d4be 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -90,8 +90,8 @@ pipeline {
                             "autotermination_minutes": 20,
                         }
                         """
-                        echo "${jsonCluster}"
-                        def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json ${jsonCluster}")
+                        sh('echo "${jsonCluster}" > cluster.json')
+                        def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json-file cluster.json")
                         def clusterRespJson = readJSON text: clusterRespString
                         def CLUSTERID = clusterRespJson['cluster_id']
                     }

From cb93a6e40d94b6e926c82338e5414bf43f843fb4 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 1 Jun 2022 15:44:47 +0300
Subject: [PATCH 080/113] Updated jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index f17d4be..81df998 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -90,7 +90,7 @@ pipeline {
                             "autotermination_minutes": 20,
                         }
                         """
-                        sh('echo "${jsonCluster}" > cluster.json')
+                        writeFile file: 'cluster.json', text: jsonCluster
                         def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json-file cluster.json")
                         def clusterRespJson = readJSON text: clusterRespString
                         def CLUSTERID = clusterRespJson['cluster_id']

From b472d9dcaf2a1e94978d4ecc8e04becc0ad45dd6 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 1 Jun 2022 15:47:09 +0300
Subject: [PATCH 081/113] Updated jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 81df998..92ee31b 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -87,7 +87,7 @@ pipeline {
                               "AWS_ACCESS_KEY_ID": "${AWS_ACCESS_KEY_ID}",
                               "AWS_SECRET_ACCESS_KEY": "${AWS_SECRET_ACCESS_KEY}"
                             },
-                            "autotermination_minutes": 20,
+                            "autotermination_minutes": 20
                         }
                         """
                         writeFile file: 'cluster.json', text: jsonCluster

From 776a83a22817366b70a22b8c8c42756a7df48046 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 1 Jun 2022 15:49:53 +0300
Subject: [PATCH 082/113] Updated jenkinsfile

---
 Jenkinsfile | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 92ee31b..09c2a7d 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1,10 +1,11 @@
 @Library('jenkinslib')_
 
 databricks_runtime = ""
+cluster_id = ""
 
 def DBTOKEN = "DATABRICKS_TOKEN"
 def DBURL = "https://dbc-6ca13d9d-74bb.cloud.databricks.com"
-//def CLUSTERID = "0428-112519-vaxgi8gx"
+//def cluster_id = "0428-112519-vaxgi8gx"
 def SCRIPTPATH = "./.ci"
 def NOTEBOOKPATH = "./databricks/python"
 def WORKSPACEPATH = "/Shared/Spark OCR/tests"
@@ -93,7 +94,7 @@ pipeline {
                         writeFile file: 'cluster.json', text: jsonCluster
                         def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json-file cluster.json")
                         def clusterRespJson = readJSON text: clusterRespString
-                        def CLUSTERID = clusterRespJson['cluster_id']
+                        cluster_id = clusterRespJson['cluster_id']
                     }
                 }
             }
@@ -101,30 +102,30 @@ pipeline {
         stage('Install deps to Cluster') {
             steps {
                 script {
-                    sh("databricks libraries uninstall --cluster-id ${CLUSTERID} --all")
-                    sh("databricks libraries install --cluster-id ${CLUSTERID} --jar  s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-spark30.jar")
-                    sh("databricks libraries install --cluster-id ${CLUSTERID} --jar  s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}.jar")
-                    sh("databricks libraries install --cluster-id ${CLUSTERID} --maven-coordinates com.johnsnowlabs.nlp:spark-nlp_2.12:${SPARK_NLP_VERSION}")
-                    sh("databricks libraries install --cluster-id ${CLUSTERID} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/spark-ocr/spark_ocr-${SPARK_OCR_VERSION}+spark30-py3-none-any.whl")
-                    sh("databricks libraries install --cluster-id ${CLUSTERID} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl/spark_nlp_jsl-${SPARK_NLP_VERSION}-py3-none-any.whl")
-                    sh("databricks libraries install --cluster-id ${CLUSTERID} --pypi-package spark-nlp==${SPARK_NLP_VERSION}")
+                    sh("databricks libraries uninstall --cluster-id ${cluster_id} --all")
+                    sh("databricks libraries install --cluster-id ${cluster_id} --jar  s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-spark30.jar")
+                    sh("databricks libraries install --cluster-id ${cluster_id} --jar  s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}.jar")
+                    sh("databricks libraries install --cluster-id ${cluster_id} --maven-coordinates com.johnsnowlabs.nlp:spark-nlp_2.12:${SPARK_NLP_VERSION}")
+                    sh("databricks libraries install --cluster-id ${cluster_id} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/spark-ocr/spark_ocr-${SPARK_OCR_VERSION}+spark30-py3-none-any.whl")
+                    sh("databricks libraries install --cluster-id ${cluster_id} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl/spark_nlp_jsl-${SPARK_NLP_VERSION}-py3-none-any.whl")
+                    sh("databricks libraries install --cluster-id ${cluster_id} --pypi-package spark-nlp==${SPARK_NLP_VERSION}")
                 }
             }
         }
         stage('Start cluster') {
             steps {
                 script {
-                    def respString = sh script: "databricks clusters get --cluster-id ${CLUSTERID}", returnStdout: true
+                    def respString = sh script: "databricks clusters get --cluster-id ${cluster_id}", returnStdout: true
                     def respJson = readJSON text: respString
                     if (respJson['state'] == 'RUNNING') {
-                        sh("databricks clusters restart --cluster-id ${CLUSTERID}")
+                        sh("databricks clusters restart --cluster-id ${cluster_id}")
                     } else {
-                        sh("databricks clusters start --cluster-id ${CLUSTERID}")
+                        sh("databricks clusters start --cluster-id ${cluster_id}")
                     }
                     timeout(10) {
                         waitUntil {
                            script {
-                             def respStringWait = sh script: "databricks clusters get --cluster-id ${CLUSTERID}", returnStdout: true
+                             def respStringWait = sh script: "databricks clusters get --cluster-id ${cluster_id}", returnStdout: true
                              def respJsonWait = readJSON text: respStringWait
                              return (respJsonWait['state'] == 'RUNNING');
                            }
@@ -139,7 +140,7 @@ pipeline {
                     withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
                         sh """python3 $SCRIPTPATH/executenotebook.py --workspace=$DBURL\
                                         --token=$TOKEN\
-                                        --clusterid=$CLUSTERID\
+                                        --clusterid=$cluster_id\
                                         --localpath=${NOTEBOOKPATH}\
                                         --workspacepath='${WORKSPACEPATH}'\
                                         --outfilepath='${OUTFILEPATH}'\

From 2a1e5b489dfcf80548329883e516e3d253c5d06b Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 1 Jun 2022 15:52:51 +0300
Subject: [PATCH 083/113] Updated jenkinsfile

---
 Jenkinsfile | 48 ++++++++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 09c2a7d..9183d8c 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -36,7 +36,7 @@ pipeline {
     parameters {
         choice(
             name:'databricks_runtime',
-            choices:'6.4.x-esr-scala2.11\n7.3.x-cpu-ml-scala2.12\n7.3.x-hls-scala2.12\n10.2.x-gpu-ml-scala2.12\n10.5.x-aarch64-scala2.12\n7.3.x-gpu-ml-scala2.12\n10.2.x-aarch64-photon-scala2.12\n10.4.x-cpu-ml-scala2.12\n9.1.x-aarch64-scala2.12\n10.1.x-photon-scala2.12\n9.1.x-photon-scala2.12\n10.4.x-scala2.12\n10.2.x-photon-scala2.12\n10.4.x-photon-scala2.12\n11.0.x-photon-scala2.12\n10.3.x-photon-scala2.12\n10.5.x-photon-scala2.12\n10.1.x-gpu-ml-scala2.12\n9.1.x-scala2.12\n11.0.x-scala2.12\n10.3.x-cpu-ml-scala2.12\n10.3.x-aarch64-photon-scala2.12\n11.0.x-gpu-ml-scala2.12\n10.5.x-aarch64-photon-scala2.12\n10.1.x-cpu-ml-scala2.12\n10.4.x-aarch64-photon-scala2.12\n10.5.x-gpu-ml-scala2.12\napache-spark-2.4.x-esr-scala2.11\n10.1.x-scala2.12\n9.1.x-cpu-ml-scala2.12\n11.0.x-cpu-ml-scala2.12\n10.2.x-aarch64-scala2.12\n10.2.x-scala2.12\n10.2.x-cpu-ml-scala2.12\n11.0.x-aarch64-photon-scala2.12\n10.4.x-aarch64-scala2.12\n11.0.x-aarch64-scala2.12\n10.1.x-aarch64-scala2.12\n9.1.x-gpu-ml-scala2.12\napache-spark-2.4.x-scala2.11\n10.5.x-scala2.12\n7.3.x-scala2.12\n10.3.x-scala2.12\n10.3.x-aarch64-scala2.12\n10.5.x-cpu-ml-scala2.12\n10.3.x-gpu-ml-scala2.12\n10.4.x-gpu-ml-scala2.12',
+            choices:'7.3.x-scala2.12\n6.4.x-esr-scala2.11\n7.3.x-cpu-ml-scala2.12\n7.3.x-hls-scala2.12\n10.2.x-gpu-ml-scala2.12\n10.5.x-aarch64-scala2.12\n7.3.x-gpu-ml-scala2.12\n10.2.x-aarch64-photon-scala2.12\n10.4.x-cpu-ml-scala2.12\n9.1.x-aarch64-scala2.12\n10.1.x-photon-scala2.12\n9.1.x-photon-scala2.12\n10.4.x-scala2.12\n10.2.x-photon-scala2.12\n10.4.x-photon-scala2.12\n11.0.x-photon-scala2.12\n10.3.x-photon-scala2.12\n10.5.x-photon-scala2.12\n10.1.x-gpu-ml-scala2.12\n9.1.x-scala2.12\n11.0.x-scala2.12\n10.3.x-cpu-ml-scala2.12\n10.3.x-aarch64-photon-scala2.12\n11.0.x-gpu-ml-scala2.12\n10.5.x-aarch64-photon-scala2.12\n10.1.x-cpu-ml-scala2.12\n10.4.x-aarch64-photon-scala2.12\n10.5.x-gpu-ml-scala2.12\napache-spark-2.4.x-esr-scala2.11\n10.1.x-scala2.12\n9.1.x-cpu-ml-scala2.12\n11.0.x-cpu-ml-scala2.12\n10.2.x-aarch64-scala2.12\n10.2.x-scala2.12\n10.2.x-cpu-ml-scala2.12\n11.0.x-aarch64-photon-scala2.12\n10.4.x-aarch64-scala2.12\n11.0.x-aarch64-scala2.12\n10.1.x-aarch64-scala2.12\n9.1.x-gpu-ml-scala2.12\napache-spark-2.4.x-scala2.11\n10.5.x-scala2.12\n10.3.x-scala2.12\n10.3.x-aarch64-scala2.12\n10.5.x-cpu-ml-scala2.12\n10.3.x-gpu-ml-scala2.12\n10.4.x-gpu-ml-scala2.12',
             description:'define spark version'
         )
     }
@@ -102,7 +102,7 @@ pipeline {
         stage('Install deps to Cluster') {
             steps {
                 script {
-                    sh("databricks libraries uninstall --cluster-id ${cluster_id} --all")
+                    //sh("databricks libraries uninstall --cluster-id ${cluster_id} --all")
                     sh("databricks libraries install --cluster-id ${cluster_id} --jar  s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-spark30.jar")
                     sh("databricks libraries install --cluster-id ${cluster_id} --jar  s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}.jar")
                     sh("databricks libraries install --cluster-id ${cluster_id} --maven-coordinates com.johnsnowlabs.nlp:spark-nlp_2.12:${SPARK_NLP_VERSION}")
@@ -112,28 +112,28 @@ pipeline {
                 }
             }
         }
-        stage('Start cluster') {
-            steps {
-                script {
-                    def respString = sh script: "databricks clusters get --cluster-id ${cluster_id}", returnStdout: true
-                    def respJson = readJSON text: respString
-                    if (respJson['state'] == 'RUNNING') {
-                        sh("databricks clusters restart --cluster-id ${cluster_id}")
-                    } else {
-                        sh("databricks clusters start --cluster-id ${cluster_id}")
-                    }
-                    timeout(10) {
-                        waitUntil {
-                           script {
-                             def respStringWait = sh script: "databricks clusters get --cluster-id ${cluster_id}", returnStdout: true
-                             def respJsonWait = readJSON text: respStringWait
-                             return (respJsonWait['state'] == 'RUNNING');
-                           }
-                        }
-                    }
-                }
-            }
-        }
+//         stage('Start cluster') {
+//             steps {
+//                 script {
+//                     def respString = sh script: "databricks clusters get --cluster-id ${cluster_id}", returnStdout: true
+//                     def respJson = readJSON text: respString
+//                     if (respJson['state'] == 'RUNNING') {
+//                         sh("databricks clusters restart --cluster-id ${cluster_id}")
+//                     } else {
+//                         sh("databricks clusters start --cluster-id ${cluster_id}")
+//                     }
+//                     timeout(10) {
+//                         waitUntil {
+//                            script {
+//                              def respStringWait = sh script: "databricks clusters get --cluster-id ${cluster_id}", returnStdout: true
+//                              def respJsonWait = readJSON text: respStringWait
+//                              return (respJsonWait['state'] == 'RUNNING');
+//                            }
+//                         }
+//                     }
+//                 }
+//             }
+//         }
         stage('Run Notebook Tests') {
             steps {
                 script {

From 4eac3acf6d651946bd2bc581678266c078b8bdf2 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 1 Jun 2022 18:55:59 +0300
Subject: [PATCH 084/113] Updated jenkinsfile

---
 Jenkinsfile | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/Jenkinsfile b/Jenkinsfile
index 9183d8c..38e78ba 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -109,6 +109,15 @@ pipeline {
                     sh("databricks libraries install --cluster-id ${cluster_id} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/spark-ocr/spark_ocr-${SPARK_OCR_VERSION}+spark30-py3-none-any.whl")
                     sh("databricks libraries install --cluster-id ${cluster_id} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl/spark_nlp_jsl-${SPARK_NLP_VERSION}-py3-none-any.whl")
                     sh("databricks libraries install --cluster-id ${cluster_id} --pypi-package spark-nlp==${SPARK_NLP_VERSION}")
+                    timeout(10) {
+                        waitUntil {
+                           script {
+                             def respStringWaitLib = sh script: "databricks libraries cluster-status --cluster-id ${cluster_id}", returnStdout: true
+                             def respJsonWaitLib = readJSON text: respStringWaitLib
+                             return (respJsonWaitLib['library_statuses'].every{ it['status'] == 'INSTALLED'} );
+                           }
+                        }
+                    }
                 }
             }
         }

From 576922fd0a8e14dfd5853414c2cb277508ac5a0b Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 1 Jun 2022 20:19:01 +0300
Subject: [PATCH 085/113] Updated jenkinsfile

---
 .ci/evaluatenotebookruns.py | 2 ++
 Jenkinsfile                 | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/.ci/evaluatenotebookruns.py b/.ci/evaluatenotebookruns.py
index ddc0d99..e71fe26 100644
--- a/.ci/evaluatenotebookruns.py
+++ b/.ci/evaluatenotebookruns.py
@@ -34,7 +34,9 @@ def test_job_run(self):
 
         for filename in glob.glob(os.path.join(path, '*.json')):
             logging.info('Evaluating: ' + filename)
+            print('Evaluating: ' + filename)
             data = json.load(open(filename))
+            print(data)
             if data['state']['life_cycle_state'] == "RUNNING":
                 statuses.append('NOT_COMPLETED')
             else:
diff --git a/Jenkinsfile b/Jenkinsfile
index 38e78ba..54b3e7d 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -149,15 +149,17 @@ pipeline {
                     withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
                         sh """python3 $SCRIPTPATH/executenotebook.py --workspace=$DBURL\
                                         --token=$TOKEN\
-                                        --clusterid=$cluster_id\
+                                        --clusterid=${cluster_id}\
                                         --localpath=${NOTEBOOKPATH}\
                                         --workspacepath='${WORKSPACEPATH}'\
                                         --outfilepath='${OUTFILEPATH}'\
                                         --ignore='${IGNORE}'
                            """
+                        sh "ls *.json"
                         sh """sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py
                               python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py
                            """
+
                     }
                 }
             }

From efde4e3b8d38a788eb66b6c679d38eaceac5b0a9 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Wed, 1 Jun 2022 21:13:13 +0300
Subject: [PATCH 086/113] Updated jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 54b3e7d..2fc57d3 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -95,6 +95,7 @@ pipeline {
                         def clusterRespString = sh(returnStdout: true, script: "databricks clusters create --json-file cluster.json")
                         def clusterRespJson = readJSON text: clusterRespString
                         cluster_id = clusterRespJson['cluster_id']
+                        sh "rm cluster.json"
                     }
                 }
             }
@@ -155,7 +156,6 @@ pipeline {
                                         --outfilepath='${OUTFILEPATH}'\
                                         --ignore='${IGNORE}'
                            """
-                        sh "ls *.json"
                         sh """sed -i -e 's #ENV# ${OUTFILEPATH} g' ${SCRIPTPATH}/evaluatenotebookruns.py
                               python3 -m pytest -s --junit-xml=${TESTRESULTPATH}/TEST-notebookout.xml ${SCRIPTPATH}/evaluatenotebookruns.py
                            """

From 8cd879a1d652691133dbdfb341e45b4c983e38d7 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 2 Jun 2022 08:57:47 +0300
Subject: [PATCH 087/113] Updated jenkinsfile

---
 Jenkinsfile | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/Jenkinsfile b/Jenkinsfile
index 2fc57d3..6796b61 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -2,6 +2,7 @@
 
 databricks_runtime = ""
 cluster_id = ""
+ocr_versions = ""
 
 def DBTOKEN = "DATABRICKS_TOKEN"
 def DBURL = "https://dbc-6ca13d9d-74bb.cloud.databricks.com"
@@ -23,6 +24,10 @@ def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
 databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime
 
 
+def sparkOcrVesrionsString = sh(returnStdout: true, script: 'gh api   -H "Accept: application/vnd.github.v3+json" /repos/johnsnowlabs/spark-ocr/releases')
+def sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString
+ocr_versions = sparkOcrVesrionsStringJson.collect{ it['name']}.join("\n")
+
 pipeline {
     agent {
         dockerfile {
@@ -40,6 +45,13 @@ pipeline {
             description:'define spark version'
         )
     }
+    parameters {
+        choice(
+            name:'ocr_version',
+            choices: ocr_versions,
+            description:'Spark Ocr Version'
+        )
+    }
     stages {
         stage('Setup') {
             steps {

From c5f802ab62cb2caf3050bd2e50b092ad0c96d9f6 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 2 Jun 2022 08:58:42 +0300
Subject: [PATCH 088/113] Updated jenkinsfile

---
 Jenkinsfile | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 6796b61..e05cbd5 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -43,9 +43,7 @@ pipeline {
             name:'databricks_runtime',
             choices:'7.3.x-scala2.12\n6.4.x-esr-scala2.11\n7.3.x-cpu-ml-scala2.12\n7.3.x-hls-scala2.12\n10.2.x-gpu-ml-scala2.12\n10.5.x-aarch64-scala2.12\n7.3.x-gpu-ml-scala2.12\n10.2.x-aarch64-photon-scala2.12\n10.4.x-cpu-ml-scala2.12\n9.1.x-aarch64-scala2.12\n10.1.x-photon-scala2.12\n9.1.x-photon-scala2.12\n10.4.x-scala2.12\n10.2.x-photon-scala2.12\n10.4.x-photon-scala2.12\n11.0.x-photon-scala2.12\n10.3.x-photon-scala2.12\n10.5.x-photon-scala2.12\n10.1.x-gpu-ml-scala2.12\n9.1.x-scala2.12\n11.0.x-scala2.12\n10.3.x-cpu-ml-scala2.12\n10.3.x-aarch64-photon-scala2.12\n11.0.x-gpu-ml-scala2.12\n10.5.x-aarch64-photon-scala2.12\n10.1.x-cpu-ml-scala2.12\n10.4.x-aarch64-photon-scala2.12\n10.5.x-gpu-ml-scala2.12\napache-spark-2.4.x-esr-scala2.11\n10.1.x-scala2.12\n9.1.x-cpu-ml-scala2.12\n11.0.x-cpu-ml-scala2.12\n10.2.x-aarch64-scala2.12\n10.2.x-scala2.12\n10.2.x-cpu-ml-scala2.12\n11.0.x-aarch64-photon-scala2.12\n10.4.x-aarch64-scala2.12\n11.0.x-aarch64-scala2.12\n10.1.x-aarch64-scala2.12\n9.1.x-gpu-ml-scala2.12\napache-spark-2.4.x-scala2.11\n10.5.x-scala2.12\n10.3.x-scala2.12\n10.3.x-aarch64-scala2.12\n10.5.x-cpu-ml-scala2.12\n10.3.x-gpu-ml-scala2.12\n10.4.x-gpu-ml-scala2.12',
             description:'define spark version'
-        )
-    }
-    parameters {
+        ),
         choice(
             name:'ocr_version',
             choices: ocr_versions,

From ecd1dab0749ee4a8d2fa204fc63843da80ac10ec Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 2 Jun 2022 08:59:53 +0300
Subject: [PATCH 089/113] Updated jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index e05cbd5..2f6c401 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -43,7 +43,7 @@ pipeline {
             name:'databricks_runtime',
             choices:'7.3.x-scala2.12\n6.4.x-esr-scala2.11\n7.3.x-cpu-ml-scala2.12\n7.3.x-hls-scala2.12\n10.2.x-gpu-ml-scala2.12\n10.5.x-aarch64-scala2.12\n7.3.x-gpu-ml-scala2.12\n10.2.x-aarch64-photon-scala2.12\n10.4.x-cpu-ml-scala2.12\n9.1.x-aarch64-scala2.12\n10.1.x-photon-scala2.12\n9.1.x-photon-scala2.12\n10.4.x-scala2.12\n10.2.x-photon-scala2.12\n10.4.x-photon-scala2.12\n11.0.x-photon-scala2.12\n10.3.x-photon-scala2.12\n10.5.x-photon-scala2.12\n10.1.x-gpu-ml-scala2.12\n9.1.x-scala2.12\n11.0.x-scala2.12\n10.3.x-cpu-ml-scala2.12\n10.3.x-aarch64-photon-scala2.12\n11.0.x-gpu-ml-scala2.12\n10.5.x-aarch64-photon-scala2.12\n10.1.x-cpu-ml-scala2.12\n10.4.x-aarch64-photon-scala2.12\n10.5.x-gpu-ml-scala2.12\napache-spark-2.4.x-esr-scala2.11\n10.1.x-scala2.12\n9.1.x-cpu-ml-scala2.12\n11.0.x-cpu-ml-scala2.12\n10.2.x-aarch64-scala2.12\n10.2.x-scala2.12\n10.2.x-cpu-ml-scala2.12\n11.0.x-aarch64-photon-scala2.12\n10.4.x-aarch64-scala2.12\n11.0.x-aarch64-scala2.12\n10.1.x-aarch64-scala2.12\n9.1.x-gpu-ml-scala2.12\napache-spark-2.4.x-scala2.11\n10.5.x-scala2.12\n10.3.x-scala2.12\n10.3.x-aarch64-scala2.12\n10.5.x-cpu-ml-scala2.12\n10.3.x-gpu-ml-scala2.12\n10.4.x-gpu-ml-scala2.12',
             description:'define spark version'
-        ),
+        )
         choice(
             name:'ocr_version',
             choices: ocr_versions,

From bc18891d1b976e10efcb9536d85251d993df6eb4 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 2 Jun 2022 09:01:22 +0300
Subject: [PATCH 090/113] Updated jenkinsfile

---
 Jenkinsfile | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 2f6c401..77417c2 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -24,9 +24,11 @@ def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
 databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime
 
 
-def sparkOcrVesrionsString = sh(returnStdout: true, script: 'gh api   -H "Accept: application/vnd.github.v3+json" /repos/johnsnowlabs/spark-ocr/releases')
-def sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString
-ocr_versions = sparkOcrVesrionsStringJson.collect{ it['name']}.join("\n")
+node {
+    def sparkOcrVesrionsString = sh(returnStdout: true, script: 'gh api   -H "Accept: application/vnd.github.v3+json" /repos/johnsnowlabs/spark-ocr/releases')
+    def sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString
+    ocr_versions = sparkOcrVesrionsStringJson.collect{ it['name']}.join("\n")
+}
 
 pipeline {
     agent {

From 19f80e22acd3e2af3c83eca9bce61e471c1e28df Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 2 Jun 2022 09:03:16 +0300
Subject: [PATCH 091/113] Updated jenkinsfile

---
 Jenkinsfile | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Jenkinsfile b/Jenkinsfile
index 77417c2..5ef03d3 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -25,9 +25,13 @@ databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : par
 
 
 node {
+    withCredentials([usernamePassword(credentialsId: '55e7e818-4ccf-4d23-b54c-fd97c21081ba',
+                                                  usernameVariable: 'GITHUB_USER',
+                                                  passwordVariable: 'GITHUB_TOKEN')]) {
     def sparkOcrVesrionsString = sh(returnStdout: true, script: 'gh api   -H "Accept: application/vnd.github.v3+json" /repos/johnsnowlabs/spark-ocr/releases')
     def sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString
     ocr_versions = sparkOcrVesrionsStringJson.collect{ it['name']}.join("\n")
+    }
 }
 
 pipeline {

From 4975e8d2ff71e376dac8139387d021263bf54091 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 2 Jun 2022 09:19:04 +0300
Subject: [PATCH 092/113] Updated jenkinsfile

---
 Jenkinsfile | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 5ef03d3..faa0fce 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -3,6 +3,7 @@
 databricks_runtime = ""
 cluster_id = ""
 ocr_versions = ""
+nlp_versions = ""
 
 def DBTOKEN = "DATABRICKS_TOKEN"
 def DBURL = "https://dbc-6ca13d9d-74bb.cloud.databricks.com"
@@ -25,12 +26,19 @@ databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : par
 
 
 node {
+
+    def get_releases(repo)
+    {
+        String sparkOcrVesrionsString = sh(returnStdout: true, script: """gh api   -H "Accept: application/vnd.github.v3+json" /repos/${repo}/releases""")
+        String sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString
+        return sparkOcrVesrionsStringJson.collect{ it['name']}.join("\n")
+    }
     withCredentials([usernamePassword(credentialsId: '55e7e818-4ccf-4d23-b54c-fd97c21081ba',
                                                   usernameVariable: 'GITHUB_USER',
                                                   passwordVariable: 'GITHUB_TOKEN')]) {
-    def sparkOcrVesrionsString = sh(returnStdout: true, script: 'gh api   -H "Accept: application/vnd.github.v3+json" /repos/johnsnowlabs/spark-ocr/releases')
-    def sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString
-    ocr_versions = sparkOcrVesrionsStringJson.collect{ it['name']}.join("\n")
+        ocr_versions = get_releases("johnsnowlabs/spark-ocr")
+        nlp_versions = get_releases("johnsnowlabs/spark-nlp")
+
     }
 }
 
@@ -55,6 +63,11 @@ pipeline {
             choices: ocr_versions,
             description:'Spark Ocr Version'
         )
+        choice(
+            name:'nlp_version',
+            choices: nlp_versions,
+            description:'Spark Nlp Version'
+        )
     }
     stages {
         stage('Setup') {

From 117b1514bd0f603eb4025b1ab02a5a0248c6c3cd Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 2 Jun 2022 09:22:09 +0300
Subject: [PATCH 093/113] Updated jenkinsfile

---
 Jenkinsfile | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index faa0fce..ba74520 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -24,15 +24,15 @@ def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
 
 databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime
 
+def get_releases(repo)
+{
+    String sparkOcrVesrionsString = sh(returnStdout: true, script: """gh api   -H "Accept: application/vnd.github.v3+json" /repos/${repo}/releases""")
+    String sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString
+    return sparkOcrVesrionsStringJson.collect{ it['name']}.join("\n")
+}
 
 node {
 
-    def get_releases(repo)
-    {
-        String sparkOcrVesrionsString = sh(returnStdout: true, script: """gh api   -H "Accept: application/vnd.github.v3+json" /repos/${repo}/releases""")
-        String sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString
-        return sparkOcrVesrionsStringJson.collect{ it['name']}.join("\n")
-    }
     withCredentials([usernamePassword(credentialsId: '55e7e818-4ccf-4d23-b54c-fd97c21081ba',
                                                   usernameVariable: 'GITHUB_USER',
                                                   passwordVariable: 'GITHUB_TOKEN')]) {

From 29d1bf0851bbe1dd401a5b238e8e6b0b07251aae Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 2 Jun 2022 09:24:35 +0300
Subject: [PATCH 094/113] Updated jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index ba74520..0660ff7 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -24,7 +24,7 @@ def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
 
 databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime
 
-def get_releases(repo)
+def String get_releases(repo)
 {
     String sparkOcrVesrionsString = sh(returnStdout: true, script: """gh api   -H "Accept: application/vnd.github.v3+json" /repos/${repo}/releases""")
     String sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString

From c066e7676cb0ec8b0cc6554d5859f35c814e266a Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 2 Jun 2022 09:26:06 +0300
Subject: [PATCH 095/113] Updated jenkinsfile

---
 Jenkinsfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 0660ff7..3e321ff 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -26,8 +26,8 @@ databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : par
 
 def String get_releases(repo)
 {
-    String sparkOcrVesrionsString = sh(returnStdout: true, script: """gh api   -H "Accept: application/vnd.github.v3+json" /repos/${repo}/releases""")
-    String sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString
+    def sparkOcrVesrionsString = sh(returnStdout: true, script: """gh api   -H "Accept: application/vnd.github.v3+json" /repos/${repo}/releases""")
+    def sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString
     return sparkOcrVesrionsStringJson.collect{ it['name']}.join("\n")
 }
 

From 6c1491e6f5bcee124f4a62810e507d26cf07384b Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 2 Jun 2022 09:29:16 +0300
Subject: [PATCH 096/113] Updated jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 3e321ff..043e9e1 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -28,7 +28,7 @@ def String get_releases(repo)
 {
     def sparkOcrVesrionsString = sh(returnStdout: true, script: """gh api   -H "Accept: application/vnd.github.v3+json" /repos/${repo}/releases""")
     def sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString
-    return sparkOcrVesrionsStringJson.collect{ it['name']}.join("\n")
+    return sparkOcrVesrionsStringJson.collect{ it['tag_name']}.join("\n")
 }
 
 node {

From b31f40774ae0aace8a5e6f203b82268e1cbe297d Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 2 Jun 2022 10:06:31 +0300
Subject: [PATCH 097/113] Updated jenkinsfile

---
 Jenkinsfile | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Jenkinsfile b/Jenkinsfile
index 043e9e1..056878f 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -4,6 +4,7 @@ databricks_runtime = ""
 cluster_id = ""
 ocr_versions = ""
 nlp_versions = ""
+nlp_healthcare_versions = ""
 
 def DBTOKEN = "DATABRICKS_TOKEN"
 def DBURL = "https://dbc-6ca13d9d-74bb.cloud.databricks.com"
@@ -38,8 +39,14 @@ node {
                                                   passwordVariable: 'GITHUB_TOKEN')]) {
         ocr_versions = get_releases("johnsnowlabs/spark-ocr")
         nlp_versions = get_releases("johnsnowlabs/spark-nlp")
+        nlp_healthcare_versions = get_releases("johnsnowlabs/spark-nlp-internal")
 
     }
+    withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
+
+    def databricks_versions = sh(returnStdout: true, script:'curl --header "Authorization: Bearer $TOKEN"  -X GET https://dbc-6ca13d9d-74bb.cloud.databricks.com/api/2.0/clusters/spark-versions')
+    echo(databricks_versions)
+    }
 }
 
 pipeline {

From 6b258e528668d45e6196ee1834070a391ce0ede2 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 2 Jun 2022 10:17:39 +0300
Subject: [PATCH 098/113] Updated jenkinsfile

---
 Jenkinsfile | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 056878f..b459503 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -5,6 +5,7 @@ cluster_id = ""
 ocr_versions = ""
 nlp_versions = ""
 nlp_healthcare_versions = ""
+databricks_versions = ""
 
 def DBTOKEN = "DATABRICKS_TOKEN"
 def DBURL = "https://dbc-6ca13d9d-74bb.cloud.databricks.com"
@@ -33,7 +34,6 @@ def String get_releases(repo)
 }
 
 node {
-
     withCredentials([usernamePassword(credentialsId: '55e7e818-4ccf-4d23-b54c-fd97c21081ba',
                                                   usernameVariable: 'GITHUB_USER',
                                                   passwordVariable: 'GITHUB_TOKEN')]) {
@@ -44,8 +44,9 @@ node {
     }
     withCredentials([string(credentialsId: DBTOKEN, variable: 'TOKEN')]) {
 
-    def databricks_versions = sh(returnStdout: true, script:'curl --header "Authorization: Bearer $TOKEN"  -X GET https://dbc-6ca13d9d-74bb.cloud.databricks.com/api/2.0/clusters/spark-versions')
-    echo(databricks_versions)
+    def databricksVersionsString = sh(returnStdout: true, script:'curl --header "Authorization: Bearer $TOKEN"  -X GET https://dbc-6ca13d9d-74bb.cloud.databricks.com/api/2.0/clusters/spark-versions')
+    def databricksVersionsStringJson = readJSON text: databricksVersionsString
+    databricks_versions = databricksVersionsStringJson['versions'].collect{ it['key']}.join("\n")
     }
 }
 
@@ -62,8 +63,8 @@ pipeline {
     parameters {
         choice(
             name:'databricks_runtime',
-            choices:'7.3.x-scala2.12\n6.4.x-esr-scala2.11\n7.3.x-cpu-ml-scala2.12\n7.3.x-hls-scala2.12\n10.2.x-gpu-ml-scala2.12\n10.5.x-aarch64-scala2.12\n7.3.x-gpu-ml-scala2.12\n10.2.x-aarch64-photon-scala2.12\n10.4.x-cpu-ml-scala2.12\n9.1.x-aarch64-scala2.12\n10.1.x-photon-scala2.12\n9.1.x-photon-scala2.12\n10.4.x-scala2.12\n10.2.x-photon-scala2.12\n10.4.x-photon-scala2.12\n11.0.x-photon-scala2.12\n10.3.x-photon-scala2.12\n10.5.x-photon-scala2.12\n10.1.x-gpu-ml-scala2.12\n9.1.x-scala2.12\n11.0.x-scala2.12\n10.3.x-cpu-ml-scala2.12\n10.3.x-aarch64-photon-scala2.12\n11.0.x-gpu-ml-scala2.12\n10.5.x-aarch64-photon-scala2.12\n10.1.x-cpu-ml-scala2.12\n10.4.x-aarch64-photon-scala2.12\n10.5.x-gpu-ml-scala2.12\napache-spark-2.4.x-esr-scala2.11\n10.1.x-scala2.12\n9.1.x-cpu-ml-scala2.12\n11.0.x-cpu-ml-scala2.12\n10.2.x-aarch64-scala2.12\n10.2.x-scala2.12\n10.2.x-cpu-ml-scala2.12\n11.0.x-aarch64-photon-scala2.12\n10.4.x-aarch64-scala2.12\n11.0.x-aarch64-scala2.12\n10.1.x-aarch64-scala2.12\n9.1.x-gpu-ml-scala2.12\napache-spark-2.4.x-scala2.11\n10.5.x-scala2.12\n10.3.x-scala2.12\n10.3.x-aarch64-scala2.12\n10.5.x-cpu-ml-scala2.12\n10.3.x-gpu-ml-scala2.12\n10.4.x-gpu-ml-scala2.12',
-            description:'define spark version'
+            choices: databricks_versions,
+            description: 'Databricks runtime version'
         )
         choice(
             name:'ocr_version',

From c1b2fb4e30da9e7c33c6719b53f7fe227981926b Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 2 Jun 2022 10:26:20 +0300
Subject: [PATCH 099/113] Updated jenkinsfile

---
 Jenkinsfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index b459503..df65b77 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -24,7 +24,7 @@ def SPARK_OCR_VERSION = "3.12.0"
 def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION)
 def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
 
-databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime
+databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.split('|')[1]
 
 def String get_releases(repo)
 {
@@ -46,7 +46,7 @@ node {
 
     def databricksVersionsString = sh(returnStdout: true, script:'curl --header "Authorization: Bearer $TOKEN"  -X GET https://dbc-6ca13d9d-74bb.cloud.databricks.com/api/2.0/clusters/spark-versions')
     def databricksVersionsStringJson = readJSON text: databricksVersionsString
-    databricks_versions = databricksVersionsStringJson['versions'].collect{ it['key']}.join("\n")
+    databricks_versions = databricksVersionsStringJson['versions'].collect{ it['name'] +"|"+it['key']}.join("\n")
     }
 }
 

From a72afceac495c2703c75e8dfd920308a3fdab449 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 2 Jun 2022 10:33:04 +0300
Subject: [PATCH 100/113] Updated jenkinsfile

---
 Jenkinsfile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index df65b77..80c7bbb 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1,6 +1,6 @@
 @Library('jenkinslib')_
 
-databricks_runtime = ""
+databricks_runtime_1 = ""
 cluster_id = ""
 ocr_versions = ""
 nlp_versions = ""
@@ -24,7 +24,7 @@ def SPARK_OCR_VERSION = "3.12.0"
 def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION)
 def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
 
-databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.split('|')[1]
+databricks_runtime_1 = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.split('|')[1]
 
 def String get_releases(repo)
 {
@@ -46,7 +46,7 @@ node {
 
     def databricksVersionsString = sh(returnStdout: true, script:'curl --header "Authorization: Bearer $TOKEN"  -X GET https://dbc-6ca13d9d-74bb.cloud.databricks.com/api/2.0/clusters/spark-versions')
     def databricksVersionsStringJson = readJSON text: databricksVersionsString
-    databricks_versions = databricksVersionsStringJson['versions'].collect{ it['name'] +"|"+it['key']}.join("\n")
+    databricks_versions = databricksVersionsStringJson['versions'].collect{ it['name'] + " |" + it['key']}.join("\n")
     }
 }
 
@@ -107,7 +107,7 @@ pipeline {
                         {
                             "num_workers": 1,
                             "cluster_name": "Spark Ocr Notebook Test",
-                            "spark_version": "${databricks_runtime}",
+                            "spark_version": "${databricks_runtime_1}",
                             "spark_conf": {
                               "spark.sql.legacy.allowUntypedScalaUDF": "true"
                             },

From 3f54d87b09e856b045598f8ece4877e2a060fe0f Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 2 Jun 2022 10:34:49 +0300
Subject: [PATCH 101/113] Updated jenkinsfile

---
 Jenkinsfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Jenkinsfile b/Jenkinsfile
index 80c7bbb..33f0aff 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -103,6 +103,7 @@ pipeline {
                         credentialsId: 'a4362e3b-808e-45e0-b7d2-1c62b0572df4',
                         accessKeyVariable: 'AWS_ACCESS_KEY_ID',
                         secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']]) {
+                        echo databricks_runtime_1
                         def jsonCluster = """
                         {
                             "num_workers": 1,

From 61a3114f8f2a34c19ea410ea91d06918a8ada560 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 2 Jun 2022 10:37:47 +0300
Subject: [PATCH 102/113] Updated jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 33f0aff..af59374 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -24,7 +24,7 @@ def SPARK_OCR_VERSION = "3.12.0"
 def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION)
 def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
 
-databricks_runtime_1 = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.split('|')[1]
+databricks_runtime_1 = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.tokenize('|')[1]
 
 def String get_releases(repo)
 {

From 348f021a9f89f12a91c83db4da42a8743ec70766 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 2 Jun 2022 11:25:13 +0300
Subject: [PATCH 103/113] Updated jenkinsfile

---
 Jenkinsfile | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index af59374..16d30fb 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -46,7 +46,7 @@ node {
 
     def databricksVersionsString = sh(returnStdout: true, script:'curl --header "Authorization: Bearer $TOKEN"  -X GET https://dbc-6ca13d9d-74bb.cloud.databricks.com/api/2.0/clusters/spark-versions')
     def databricksVersionsStringJson = readJSON text: databricksVersionsString
-    databricks_versions = databricksVersionsStringJson['versions'].collect{ it['name'] + " |" + it['key']}.join("\n")
+    databricks_versions = databricksVersionsStringJson['versions'].collect{ it['name'] + " |" + it['key']}.sort().join("\n")
     }
 }
 
@@ -69,12 +69,17 @@ pipeline {
         choice(
             name:'ocr_version',
             choices: ocr_versions,
-            description:'Spark Ocr Version'
+            description:'Spark Ocr version'
         )
         choice(
             name:'nlp_version',
             choices: nlp_versions,
-            description:'Spark Nlp Version'
+            description:'Spark Nlp version'
+        )
+        choice(
+            name:'nlp_healthcare_version',
+            choices: nlp_healthcare_versions,
+            description:'Spark Nlp for Healthcare version'
         )
     }
     stages {

From 0136be3522b94deaf7364cf682a91b6cf1d6225f Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 2 Jun 2022 17:26:16 +0300
Subject: [PATCH 104/113] Updated jenkinsfile

---
 Jenkinsfile | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 16d30fb..9c1b520 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1,6 +1,6 @@
 @Library('jenkinslib')_
 
-databricks_runtime_1 = ""
+databricks_runtime = ""
 cluster_id = ""
 ocr_versions = ""
 nlp_versions = ""
@@ -24,7 +24,7 @@ def SPARK_OCR_VERSION = "3.12.0"
 def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION)
 def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
 
-databricks_runtime_1 = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.tokenize('|')[1]
+databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.tokenize('|')[1]
 
 def String get_releases(repo)
 {
@@ -46,7 +46,7 @@ node {
 
     def databricksVersionsString = sh(returnStdout: true, script:'curl --header "Authorization: Bearer $TOKEN"  -X GET https://dbc-6ca13d9d-74bb.cloud.databricks.com/api/2.0/clusters/spark-versions')
     def databricksVersionsStringJson = readJSON text: databricksVersionsString
-    databricks_versions = databricksVersionsStringJson['versions'].collect{ it['name'] + " |" + it['key']}.sort().join("\n")
+    databricks_versions = databricksVersionsStringJson['versions'].collect{ it['name'] + " |" + it['key']}.sort(false) { it.tokenize(' ')[0] as Integer }.join("\n")
     }
 }
 
@@ -63,7 +63,7 @@ pipeline {
     parameters {
         choice(
             name:'databricks_runtime',
-            choices: databricks_versions,
+            choices: '7.3.x-scala2.12\n' + databricks_versions,
             description: 'Databricks runtime version'
         )
         choice(
@@ -108,12 +108,11 @@ pipeline {
                         credentialsId: 'a4362e3b-808e-45e0-b7d2-1c62b0572df4',
                         accessKeyVariable: 'AWS_ACCESS_KEY_ID',
                         secretKeyVariable: 'AWS_SECRET_ACCESS_KEY']]) {
-                        echo databricks_runtime_1
                         def jsonCluster = """
                         {
                             "num_workers": 1,
                             "cluster_name": "Spark Ocr Notebook Test",
-                            "spark_version": "${databricks_runtime_1}",
+                            "spark_version": "${databricks_runtime}",
                             "spark_conf": {
                               "spark.sql.legacy.allowUntypedScalaUDF": "true"
                             },
@@ -210,6 +209,7 @@ pipeline {
     }
     post {
         always {
+            sh "databricks clusters delete --cluster-id ${cluster_id}"
             sh "find ${OUTFILEPATH} -name '*.json' -exec rm {} +"
             junit allowEmptyResults: true, testResults: "**/reports/junit/*.xml"
         }

From b1dd392d3046a230b03e5199d60d08448b943f05 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Thu, 2 Jun 2022 17:30:48 +0300
Subject: [PATCH 105/113] Updated jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 9c1b520..1c2accf 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -46,7 +46,7 @@ node {
 
     def databricksVersionsString = sh(returnStdout: true, script:'curl --header "Authorization: Bearer $TOKEN"  -X GET https://dbc-6ca13d9d-74bb.cloud.databricks.com/api/2.0/clusters/spark-versions')
     def databricksVersionsStringJson = readJSON text: databricksVersionsString
-    databricks_versions = databricksVersionsStringJson['versions'].collect{ it['name'] + " |" + it['key']}.sort(false) { it.tokenize(' ')[0] as Integer }.join("\n")
+    databricks_versions = databricksVersionsStringJson['versions'].collect{ it['name'] + " |" + it['key']}.sort().join("\n")
     }
 }
 

From 6f2e931665d10d1addcd603b761ef564f31580ba Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Fri, 3 Jun 2022 12:57:30 +0300
Subject: [PATCH 106/113] Updated jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 1c2accf..e1c5010 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -63,7 +63,7 @@ pipeline {
     parameters {
         choice(
             name:'databricks_runtime',
-            choices: '7.3.x-scala2.12\n' + databricks_versions,
+            choices: '7.3 LTS Spark 3.0.1 |7.3.x-scala2.12\n' + databricks_versions,
             description: 'Databricks runtime version'
         )
         choice(

From e55cca00381c2bdeba023cab6938f0724185b796 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Fri, 3 Jun 2022 13:01:01 +0300
Subject: [PATCH 107/113] Updated jenkinsfile

---
 Jenkinsfile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index e1c5010..4770a3c 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1,6 +1,6 @@
 @Library('jenkinslib')_
 
-databricks_runtime = ""
+databricks_runtime_version = ""
 cluster_id = ""
 ocr_versions = ""
 nlp_versions = ""
@@ -24,7 +24,7 @@ def SPARK_OCR_VERSION = "3.12.0"
 def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION)
 def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
 
-databricks_runtime = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.tokenize('|')[1]
+databricks_runtime_version = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.tokenize('|')[1]
 
 def String get_releases(repo)
 {
@@ -112,7 +112,7 @@ pipeline {
                         {
                             "num_workers": 1,
                             "cluster_name": "Spark Ocr Notebook Test",
-                            "spark_version": "${databricks_runtime}",
+                            "spark_version": "${databricks_runtime_version}",
                             "spark_conf": {
                               "spark.sql.legacy.allowUntypedScalaUDF": "true"
                             },

From 4e0bb937e7c14be3b6ac16ab29684ce11ae4ebaa Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Fri, 3 Jun 2022 13:20:09 +0300
Subject: [PATCH 108/113] Updated jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 4770a3c..b079fb7 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -28,7 +28,7 @@ databricks_runtime_version = params.databricks_runtime == null ? '7.3.x-scala2.1
 
 def String get_releases(repo)
 {
-    def sparkOcrVesrionsString = sh(returnStdout: true, script: """gh api   -H "Accept: application/vnd.github.v3+json" /repos/${repo}/releases""")
+    def sparkOcrVesrionsString = sh(returnStdout: true, script: """gh api --paginate  -H "Accept: application/vnd.github.v3+json" /repos/${repo}/releases""")
     def sparkOcrVesrionsStringJson = readJSON text: sparkOcrVesrionsString
     return sparkOcrVesrionsStringJson.collect{ it['tag_name']}.join("\n")
 }

From 7b9b0b1fafc842fe45d7e690c402cb25b02e600a Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Fri, 3 Jun 2022 14:33:59 +0300
Subject: [PATCH 109/113] Updated jenkinsfile

---
 Jenkinsfile | 42 +++++++++++++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 11 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index b079fb7..2101195 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -6,10 +6,10 @@ ocr_versions = ""
 nlp_versions = ""
 nlp_healthcare_versions = ""
 databricks_versions = ""
+nlp_version_prefix = ""
 
 def DBTOKEN = "DATABRICKS_TOKEN"
 def DBURL = "https://dbc-6ca13d9d-74bb.cloud.databricks.com"
-//def cluster_id = "0428-112519-vaxgi8gx"
 def SCRIPTPATH = "./.ci"
 def NOTEBOOKPATH = "./databricks/python"
 def WORKSPACEPATH = "/Shared/Spark OCR/tests"
@@ -17,14 +17,22 @@ def OUTFILEPATH = "."
 def TESTRESULTPATH = "./reports/junit"
 def IGNORE = "3. Compare CPU and GPU image processing with Spark OCR.ipynb"
 
-def SPARK_NLP_VERSION = "3.4.2"
-def SPARK_NLP_HEALTHCARE_VERSION = "3.4.2"
-def SPARK_OCR_VERSION = "3.12.0"
+databricks_runtime_version = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.tokenize('|')[1]
 
-def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION)
-def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
 
-databricks_runtime_version = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.tokenize('|')[1]
+switch(spark_version) {
+case 'spark24':
+    nlp_version_prefix="-spark24"
+    break
+case 'spark23':
+    nlp_version_prefix="-spark23"
+    break
+case 'spark30':
+    nlp_version_prefix=""
+    break
+case 'spark32':
+    nlp_version_prefix="-spark32"
+}
 
 def String get_releases(repo)
 {
@@ -71,6 +79,11 @@ pipeline {
             choices: ocr_versions,
             description:'Spark Ocr version'
         )
+        choice(
+            name:'spark_version',
+            choices:'spark30\nspark32\nspark24\nspark23',
+            description:'define spark version'
+            )
         choice(
             name:'nlp_version',
             choices: nlp_versions,
@@ -145,11 +158,18 @@ pipeline {
         stage('Install deps to Cluster') {
             steps {
                 script {
+                    def SPARK_NLP_VERSION = params.nlp_version
+                    def SPARK_NLP_HEALTHCARE_VERSION = params.nlp_healthcare_version
+                    def SPARK_OCR_VERSION = params.ocr_version
+
+                    def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION)
+                    def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
+
                     //sh("databricks libraries uninstall --cluster-id ${cluster_id} --all")
-                    sh("databricks libraries install --cluster-id ${cluster_id} --jar  s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-spark30.jar")
-                    sh("databricks libraries install --cluster-id ${cluster_id} --jar  s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}.jar")
-                    sh("databricks libraries install --cluster-id ${cluster_id} --maven-coordinates com.johnsnowlabs.nlp:spark-nlp_2.12:${SPARK_NLP_VERSION}")
-                    sh("databricks libraries install --cluster-id ${cluster_id} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/spark-ocr/spark_ocr-${SPARK_OCR_VERSION}+spark30-py3-none-any.whl")
+                    sh("databricks libraries install --cluster-id ${cluster_id} --jar  s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-${spark_version}.jar")
+                    sh("databricks libraries install --cluster-id ${cluster_id} --jar  s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}${nlp_version_prefix}.jar")
+                    sh("databricks libraries install --cluster-id ${cluster_id} --maven-coordinates com.johnsnowlabs.nlp:spark-nlp${nlp_version_prefix}_2.12:${SPARK_NLP_VERSION}")
+                    sh("databricks libraries install --cluster-id ${cluster_id} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/spark-ocr/spark_ocr-${SPARK_OCR_VERSION}+${spark_version}-py3-none-any.whl")
                     sh("databricks libraries install --cluster-id ${cluster_id} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl/spark_nlp_jsl-${SPARK_NLP_VERSION}-py3-none-any.whl")
                     sh("databricks libraries install --cluster-id ${cluster_id} --pypi-package spark-nlp==${SPARK_NLP_VERSION}")
                     timeout(10) {

From b68cee2036391834e5970461df99a624f676db38 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Fri, 3 Jun 2022 14:36:03 +0300
Subject: [PATCH 110/113] Updated jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 2101195..e0ae489 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -18,7 +18,7 @@ def TESTRESULTPATH = "./reports/junit"
 def IGNORE = "3. Compare CPU and GPU image processing with Spark OCR.ipynb"
 
 databricks_runtime_version = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.tokenize('|')[1]
-
+def spark_version = params.spark_version == null ? 'spark30' : params.spark_version
 
 switch(spark_version) {
 case 'spark24':

From ee9a3a8a17087d7c56dfce4adfa036b6c8ed4302 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Fri, 3 Jun 2022 14:41:11 +0300
Subject: [PATCH 111/113] Updated jenkinsfile

---
 Jenkinsfile | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index e0ae489..9197980 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -17,6 +17,13 @@ def OUTFILEPATH = "."
 def TESTRESULTPATH = "./reports/junit"
 def IGNORE = "3. Compare CPU and GPU image processing with Spark OCR.ipynb"
 
+def SPARK_NLP_VERSION = params.nlp_version
+def SPARK_NLP_HEALTHCARE_VERSION = params.nlp_healthcare_version
+def SPARK_OCR_VERSION = params.ocr_version
+
+def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION)
+def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
+
 databricks_runtime_version = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.tokenize('|')[1]
 def spark_version = params.spark_version == null ? 'spark30' : params.spark_version
 
@@ -158,13 +165,6 @@ pipeline {
         stage('Install deps to Cluster') {
             steps {
                 script {
-                    def SPARK_NLP_VERSION = params.nlp_version
-                    def SPARK_NLP_HEALTHCARE_VERSION = params.nlp_healthcare_version
-                    def SPARK_OCR_VERSION = params.ocr_version
-
-                    def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION)
-                    def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
-
                     //sh("databricks libraries uninstall --cluster-id ${cluster_id} --all")
                     sh("databricks libraries install --cluster-id ${cluster_id} --jar  s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-${spark_version}.jar")
                     sh("databricks libraries install --cluster-id ${cluster_id} --jar  s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}${nlp_version_prefix}.jar")

From 0a9004bfa0fc45ea334be9a39e95bd0fd3122987 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Fri, 3 Jun 2022 15:07:59 +0300
Subject: [PATCH 112/113] Updated jenkinsfile

---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 9197980..24900aa 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -229,7 +229,7 @@ pipeline {
     }
     post {
         always {
-            sh "databricks clusters delete --cluster-id ${cluster_id}"
+            sh "databricks clusters permanent-delete --cluster-id ${cluster_id}"
             sh "find ${OUTFILEPATH} -name '*.json' -exec rm {} +"
             junit allowEmptyResults: true, testResults: "**/reports/junit/*.xml"
         }

From 7fa5bd1d4d86a6d48ac8ab5b19da53a20cfb2075 Mon Sep 17 00:00:00 2001
From: Mykola Melnyk <kolia1985@gmail.com>
Date: Fri, 3 Jun 2022 15:23:07 +0300
Subject: [PATCH 113/113] Updated jenkinsfile

---
 Jenkinsfile | 36 ++++++------------------------------
 1 file changed, 6 insertions(+), 30 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 24900aa..2d5df80 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1,6 +1,5 @@
 @Library('jenkinslib')_
 
-databricks_runtime_version = ""
 cluster_id = ""
 ocr_versions = ""
 nlp_versions = ""
@@ -24,10 +23,10 @@ def SPARK_OCR_VERSION = params.ocr_version
 def PYPI_REPO_HEALTHCARE_SECRET = sparknlp_helpers.spark_nlp_healthcare_secret(SPARK_NLP_HEALTHCARE_VERSION)
 def PYPI_REPO_OCR_SECRET = sparknlp_helpers.spark_ocr_secret(SPARK_OCR_VERSION)
 
-databricks_runtime_version = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.tokenize('|')[1]
-def spark_version = params.spark_version == null ? 'spark30' : params.spark_version
+def DATABRICKS_RUNTIME_VERSION = params.databricks_runtime == null ? '7.3.x-scala2.12' : params.databricks_runtime.tokenize('|')[1]
+def SPARK_VERSION = params.spark_version == null ? 'spark30' : params.spark_version
 
-switch(spark_version) {
+switch(SPARK_VERSION) {
 case 'spark24':
     nlp_version_prefix="-spark24"
     break
@@ -132,7 +131,7 @@ pipeline {
                         {
                             "num_workers": 1,
                             "cluster_name": "Spark Ocr Notebook Test",
-                            "spark_version": "${databricks_runtime_version}",
+                            "spark_version": "${DATABRICKS_RUNTIME_VERSION}",
                             "spark_conf": {
                               "spark.sql.legacy.allowUntypedScalaUDF": "true"
                             },
@@ -165,11 +164,10 @@ pipeline {
         stage('Install deps to Cluster') {
             steps {
                 script {
-                    //sh("databricks libraries uninstall --cluster-id ${cluster_id} --all")
-                    sh("databricks libraries install --cluster-id ${cluster_id} --jar  s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-${spark_version}.jar")
+                    sh("databricks libraries install --cluster-id ${cluster_id} --jar  s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/jars/spark-ocr-assembly-${SPARK_OCR_VERSION}-${SPARK_VERSION}.jar")
                     sh("databricks libraries install --cluster-id ${cluster_id} --jar  s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl-${SPARK_NLP_HEALTHCARE_VERSION}${nlp_version_prefix}.jar")
                     sh("databricks libraries install --cluster-id ${cluster_id} --maven-coordinates com.johnsnowlabs.nlp:spark-nlp${nlp_version_prefix}_2.12:${SPARK_NLP_VERSION}")
-                    sh("databricks libraries install --cluster-id ${cluster_id} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/spark-ocr/spark_ocr-${SPARK_OCR_VERSION}+${spark_version}-py3-none-any.whl")
+                    sh("databricks libraries install --cluster-id ${cluster_id} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_OCR_SECRET}/spark-ocr/spark_ocr-${SPARK_OCR_VERSION}+${SPARK_VERSION}-py3-none-any.whl")
                     sh("databricks libraries install --cluster-id ${cluster_id} --whl s3://pypi.johnsnowlabs.com/${PYPI_REPO_HEALTHCARE_SECRET}/spark-nlp-jsl/spark_nlp_jsl-${SPARK_NLP_VERSION}-py3-none-any.whl")
                     sh("databricks libraries install --cluster-id ${cluster_id} --pypi-package spark-nlp==${SPARK_NLP_VERSION}")
                     timeout(10) {
@@ -184,28 +182,6 @@ pipeline {
                 }
             }
         }
-//         stage('Start cluster') {
-//             steps {
-//                 script {
-//                     def respString = sh script: "databricks clusters get --cluster-id ${cluster_id}", returnStdout: true
-//                     def respJson = readJSON text: respString
-//                     if (respJson['state'] == 'RUNNING') {
-//                         sh("databricks clusters restart --cluster-id ${cluster_id}")
-//                     } else {
-//                         sh("databricks clusters start --cluster-id ${cluster_id}")
-//                     }
-//                     timeout(10) {
-//                         waitUntil {
-//                            script {
-//                              def respStringWait = sh script: "databricks clusters get --cluster-id ${cluster_id}", returnStdout: true
-//                              def respJsonWait = readJSON text: respStringWait
-//                              return (respJsonWait['state'] == 'RUNNING');
-//                            }
-//                         }
-//                     }
-//                 }
-//             }
-//         }
         stage('Run Notebook Tests') {
             steps {
                 script {