|
| 1 | +{%- capture title -%} |
| 2 | +GenericREModel |
| 3 | +{%- endcapture -%} |
| 4 | + |
| 5 | + |
| 6 | +{%- capture model -%} |
| 7 | +model |
| 8 | +{%- endcapture -%} |
| 9 | + |
| 10 | +{%- capture model_description -%} |
| 11 | +Instantiated RelationExtractionModel for extracting relationships between any entities. |
| 12 | +This class is not intended to be directly used, please use the RelationExtractionModel instead. |
| 13 | +Pairs of entities should be specified using setRelationPairs. |
| 14 | + |
| 15 | + |
| 16 | +{%- endcapture -%} |
| 17 | + |
| 18 | +{%- capture model_input_anno -%} |
| 19 | +WORD_EMBEDDINGS, POS, CHUNK, DEPENDENCY |
| 20 | +{%- endcapture -%} |
| 21 | + |
| 22 | +{%- capture model_output_anno -%} |
| 23 | +CATEGORY |
| 24 | +{%- endcapture -%} |
| 25 | + |
| 26 | +{%- capture model_python_medical -%} |
| 27 | +from johnsnowlabs import nlp, medical |
| 28 | + |
| 29 | +documenter = nlp.DocumentAssembler()\ |
| 30 | + .setInputCol("text")\ |
| 31 | + .setOutputCol("document") |
| 32 | + |
| 33 | +sentencer = nlp.SentenceDetector()\ |
| 34 | + .setInputCols(["document"])\ |
| 35 | + .setOutputCol("sentences") |
| 36 | + |
| 37 | +tokenizer = nlp.Tokenizer()\ |
| 38 | + .setInputCols(["sentences"])\ |
| 39 | + .setOutputCol("tokens") |
| 40 | + |
| 41 | +words_embedder = nlp.WordEmbeddingsModel()\ |
| 42 | + .pretrained("embeddings_clinical", "en", "clinical/models")\ |
| 43 | + .setInputCols(["sentences", "tokens"])\ |
| 44 | + .setOutputCol("embeddings") |
| 45 | + |
| 46 | +pos_tagger = nlp.PerceptronModel()\ |
| 47 | + .pretrained("pos_clinical", "en", "clinical/models") \ |
| 48 | + .setInputCols(["sentences", "tokens"])\ |
| 49 | + .setOutputCol("pos_tags") |
| 50 | + |
| 51 | +ner_tagger = medical.NerModel()\ |
| 52 | + .pretrained("ner_posology", "en", "clinical/models")\ |
| 53 | + .setInputCols("sentences", "tokens", "embeddings")\ |
| 54 | + .setOutputCol("ner_tags") |
| 55 | + |
| 56 | +ner_chunker = medical.NerConverterInternal()\ |
| 57 | + .setInputCols(["sentences", "tokens", "ner_tags"])\ |
| 58 | + .setOutputCol("ner_chunks") |
| 59 | + |
| 60 | +dependency_parser = nlp.DependencyParserModel()\ |
| 61 | + .pretrained("dependency_conllu", "en")\ |
| 62 | + .setInputCols(["sentences", "pos_tags", "tokens"])\ |
| 63 | + .setOutputCol("dependencies") |
| 64 | + |
| 65 | +reModel = medical.RelationExtractionModel()\ |
| 66 | + .pretrained("generic_re")\ |
| 67 | + .setInputCols(["embeddings", "pos_tags", "ner_chunks", "dependencies"])\ |
| 68 | + .setOutputCol("relations")\ |
| 69 | + .setRelationPairs(["problem-test", |
| 70 | + "problem-treatment"])\ |
| 71 | + .setMaxSyntacticDistance(4) |
| 72 | + |
| 73 | +pipeline = nlp.Pipeline(stages=[ |
| 74 | + documenter, |
| 75 | + sentencer, |
| 76 | + tokenizer, |
| 77 | + words_embedder, |
| 78 | + pos_tagger, |
| 79 | + ner_tagger, |
| 80 | + ner_chunker, |
| 81 | + dependency_parser, |
| 82 | + reModel |
| 83 | +]) |
| 84 | + |
| 85 | +text = """ |
| 86 | +A 28-year-old female with a history of gestational diabetes mellitus diagnosed eight years prior to |
| 87 | +presentation and subsequent type two diabetes mellitus ( T2DM ), one prior episode of HTG-induced pancreatitis |
| 88 | +three years prior to presentation , associated with an acute hepatitis , and obesity with a body mass index |
| 89 | +( BMI ) of 33.5 kg/m2 , presented with a one-week history of polyuria , polydipsia , poor appetite , and |
| 90 | +vomiting . Two weeks prior to presentation , she was treated with a five-day course of amoxicillin for a |
| 91 | +respiratory tract infection . She was on metformin , glipizide , and dapagliflozin for T2DM and atorvastatin |
| 92 | +and gemfibrozil for HTG . She had been on dapagliflozin for six months at the time of presentation . Physical |
| 93 | +examination on presentation was significant for dry oral mucosa ; significantly , her abdominal examination was |
| 94 | +benign with no tenderness , guarding , or rigidity . |
| 95 | +""" |
| 96 | +df = spark.createDataFrame([[text]]).toDF("text") |
| 97 | +result = pipeline.fit(df).transform(df) |
| 98 | + |
| 99 | +# Show results |
| 100 | +result.select(F.explode(F.arrays_zip( |
| 101 | + result.relations.result, |
| 102 | + result.relations.metadata)).alias("cols"))\ |
| 103 | +.select( |
| 104 | + F.expr("cols['1']['chunk1']").alias("chunk1"), |
| 105 | + F.expr("cols['1']['chunk2']").alias("chunk2"), |
| 106 | + F.expr("cols['1']['entity1']").alias("entity1"), |
| 107 | + F.expr("cols['1']['entity2']").alias("entity2"), |
| 108 | + F.expr("cols['0']").alias("relations"), |
| 109 | + F.expr("cols['1']['confidence']").alias("confidence")).show(5, truncate=False) |
| 110 | + |
| 111 | ++-----------------+-------------+-------+-------+------------+----------+ |
| 112 | +|chunk1 |chunk2 |entity1|entity2|relations |confidence| |
| 113 | ++-----------------+-------------+-------+-------+------------+----------+ |
| 114 | +|obesity |BMI |PROBLEM|TEST |PROBLEM-TEST|1.0 | |
| 115 | +|a body mass index|BMI |PROBLEM|TEST |PROBLEM-TEST|1.0 | |
| 116 | +|BMI |polyuria |TEST |PROBLEM|TEST-PROBLEM|1.0 | |
| 117 | +|BMI |polydipsia |TEST |PROBLEM|TEST-PROBLEM|1.0 | |
| 118 | +|BMI |poor appetite|TEST |PROBLEM|TEST-PROBLEM|1.0 | |
| 119 | ++-----------------+-------------+-------+-------+------------+----------+ |
| 120 | + |
| 121 | +{%- endcapture -%} |
| 122 | + |
| 123 | +{%- capture model_scala_medical -%} |
| 124 | +import spark.implicits._ |
| 125 | + |
| 126 | +val documenter = new DocumentAssembler() |
| 127 | + .setInputCol("text") |
| 128 | + .setOutputCol("document") |
| 129 | + |
| 130 | +val sentencer = new SentenceDetector() |
| 131 | + .setInputCols("document") |
| 132 | + .setOutputCol("sentences") |
| 133 | + |
| 134 | +val tokenizer = new Tokenizer() |
| 135 | + .setInputCols("sentences") |
| 136 | + .setOutputCol("tokens") |
| 137 | + |
| 138 | +val words_embedder = WordEmbeddingsModel.pretrained("embeddings_clinical","en","clinical/models") |
| 139 | + .setInputCols(Array("sentences","tokens")) |
| 140 | + .setOutputCol("embeddings") |
| 141 | + |
| 142 | +val pos_tagger = PerceptronModel.pretrained("pos_clinical","en","clinical/models") |
| 143 | + .setInputCols(Array("sentences","tokens")) |
| 144 | + .setOutputCol("pos_tags") |
| 145 | + |
| 146 | +val ner_tagger = MedicalNerModel.pretrained("ner_posology","en","clinical/models") |
| 147 | + .setInputCols("sentences","tokens","embeddings") |
| 148 | + .setOutputCol("ner_tags") |
| 149 | + |
| 150 | +val ner_chunker = new NerConverterInternal() |
| 151 | + .setInputCols(Array("sentences","tokens","ner_tags")) |
| 152 | + .setOutputCol("ner_chunks") |
| 153 | + |
| 154 | +val dependency_parser = DependencyParserModel.pretrained("dependency_conllu","en") |
| 155 | + .setInputCols(Array("sentences","pos_tags","tokens")) |
| 156 | + .setOutputCol("dependencies") |
| 157 | + |
| 158 | +val reModel = RelationExtractionModel.pretrained("generic_re") |
| 159 | + .setInputCols(Array("embeddings","pos_tags","ner_chunks","dependencies")) |
| 160 | + .setOutputCol("relations") |
| 161 | + .setRelationPairs(Array("problem-test","problem-treatment")) |
| 162 | + .setMaxSyntacticDistance(4) |
| 163 | + |
| 164 | + |
| 165 | +val pipeline = new Pipeline().setStages(Array( |
| 166 | + documenter, |
| 167 | + sentencer, |
| 168 | + tokenizer, |
| 169 | + words_embedder, |
| 170 | + pos_tagger, |
| 171 | + ner_tagger, |
| 172 | + ner_chunker, |
| 173 | + dependency_parser, |
| 174 | + reModel )) |
| 175 | + |
| 176 | +val text = "A 28-year-old female with a history of gestational diabetes mellitus diagnosed eight years prior to " + |
| 177 | +"presentation and subsequent type two diabetes mellitus ( T2DM ), one prior episode of HTG-induced pancreatitis " + |
| 178 | +"three years prior to presentation , associated with an acute hepatitis , and obesity with a body mass index " + |
| 179 | +"( BMI ) of 33.5 kg/m2 , presented with a one-week history of polyuria , polydipsia , poor appetite , and " + |
| 180 | +"vomiting . Two weeks prior to presentation , she was treated with a five-day course of amoxicillin for a " + |
| 181 | +"respiratory tract infection . She was on metformin , glipizide , and dapagliflozin for T2DM and atorvastatin " + |
| 182 | +"and gemfibrozil for HTG . She had been on dapagliflozin for six months at the time of presentation . Physical " + |
| 183 | +"examination on presentation was significant for dry oral mucosa ; significantly , her abdominal examination was " + |
| 184 | +"benign with no tenderness , guarding , or rigidity." |
| 185 | + |
| 186 | +val df = Seq(text) .toDF("text") |
| 187 | +val result = pipeline.fit(df) .transform(df) |
| 188 | + |
| 189 | +// Show results |
| 190 | + |
| 191 | ++-----------------+-------------+-------+-------+------------+----------+ |
| 192 | +|chunk1 |chunk2 |entity1|entity2|relations |confidence| |
| 193 | ++-----------------+-------------+-------+-------+------------+----------+ |
| 194 | +|obesity |BMI |PROBLEM|TEST |PROBLEM-TEST|1.0 | |
| 195 | +|a body mass index|BMI |PROBLEM|TEST |PROBLEM-TEST|1.0 | |
| 196 | +|BMI |polyuria |TEST |PROBLEM|TEST-PROBLEM|1.0 | |
| 197 | +|BMI |polydipsia |TEST |PROBLEM|TEST-PROBLEM|1.0 | |
| 198 | +|BMI |poor appetite|TEST |PROBLEM|TEST-PROBLEM|1.0 | |
| 199 | ++-----------------+-------------+-------+-------+------------+----------+ |
| 200 | + |
| 201 | +{%- endcapture -%} |
| 202 | + |
| 203 | + |
| 204 | +{%- capture model_api_link -%} |
| 205 | +[RelationExtractionModel](https://nlp.johnsnowlabs.com/licensed/api/com/johnsnowlabs/nlp/annotators/re/RelationExtractionModel.html) |
| 206 | +{%- endcapture -%} |
| 207 | + |
| 208 | +{%- capture model_python_api_link -%} |
| 209 | +[RelationExtractionModel](https://nlp.johnsnowlabs.com/licensed/api/python/reference/autosummary/sparknlp_jsl/annotator/re/relation_extraction/index.html#sparknlp_jsl.annotator.re.relation_extraction.RelationExtractionModel) |
| 210 | +{%- endcapture -%} |
| 211 | + |
| 212 | +{%- capture model_notebook_link -%} |
| 213 | +[RelationExtractionModelNotebook](https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/Healthcare_MOOC/Spark_NLP_Udemy_MOOC/Healthcare_NLP/RelationExtractionModel.ipynb) |
| 214 | +{%- endcapture -%} |
| 215 | + |
| 216 | + |
| 217 | +{% include templates/licensed_approach_model_medical_fin_leg_template.md |
| 218 | +title=title |
| 219 | +model=model |
| 220 | +model_description=model_description |
| 221 | +model_input_anno=model_input_anno |
| 222 | +model_output_anno=model_output_anno |
| 223 | +model_python_medical=model_python_medical |
| 224 | +model_scala_medical=model_scala_medical |
| 225 | +model_api_link=model_api_link |
| 226 | +model_python_api_link=model_python_api_link |
| 227 | +model_notebook_link=model_notebook_link |
| 228 | +%} |
0 commit comments