2
2
from dataikuapi .utils import DataikuException
3
3
from safe_logger import SafeLogger
4
4
from rest_api_client import RestAPIClient
5
- from dku_utils import get_dku_key_values , get_endpoint_parameters
5
+ from dku_utils import get_dku_key_values , get_endpoint_parameters , parse_keys_for_json , get_value_from_path
6
+ from dku_constants import DKUConstants
7
+ import json
8
+
6
9
7
10
logger = SafeLogger ("api-connect plugin" , forbiden_keys = ["token" , "password" ])
8
11
@@ -18,10 +21,11 @@ def __init__(self, config, plugin_config):
18
21
custom_key_values = get_dku_key_values (config .get ("custom_key_values" , {}))
19
22
self .client = RestAPIClient (credential , endpoint_parameters , custom_key_values )
20
23
extraction_key = endpoint_parameters .get ("extraction_key" , None )
21
- if extraction_key == '' :
22
- extraction_key = None
23
- self .extraction_key = extraction_key
24
+ self .extraction_key = extraction_key or ''
25
+ self .extraction_path = self .extraction_key .split ('.' )
24
26
self .raw_output = endpoint_parameters .get ("raw_output" , None )
27
+ self .maximum_number_rows = config .get ("maximum_number_rows" , - 1 )
28
+ self .display_metadata = config .get ("display_metadata" , False )
25
29
26
30
def get_read_schema (self ):
27
31
# In this example, we don't specify a schema here, so DSS will infer the schema
@@ -30,29 +34,37 @@ def get_read_schema(self):
30
34
31
35
def generate_rows (self , dataset_schema = None , dataset_partitioning = None ,
32
36
partition_id = None , records_limit = - 1 ):
33
- is_records_limit = records_limit > 0
37
+ is_records_limit = (records_limit > 0 ) or (self .maximum_number_rows > 0 )
38
+ if self .maximum_number_rows > 0 :
39
+ records_limit = self .maximum_number_rows
34
40
record_count = 0
35
41
while self .client .has_more_data ():
36
42
json_response = self .client .paginated_api_call ()
37
- if self .extraction_key is None :
38
- # Todo: check api_response key is free and add something overwise
39
- if isinstance (json_response , list ):
40
- record_count += len (json_response )
41
- for row in json_response :
42
- yield {"api_response" : row }
43
- else :
44
- record_count += 1
45
- yield {"api_response" : json_response }
43
+ metadata = self .client .get_metadata () if self .display_metadata else None
44
+ if self .extraction_key :
45
+ data = get_value_from_path (json_response , self .extraction_path )
46
46
else :
47
- data = json_response .get (self .extraction_key , None )
48
- if data is None :
49
- raise DataikuException ("Extraction key '{}' was not found in the incoming data" .format (self .extraction_key ))
47
+ data = json_response
48
+ if isinstance (data , list ):
50
49
record_count += len (data )
51
- for result in data :
52
- yield {"api_response" : result } if self .raw_output else result
50
+ for row in data :
51
+ yield self .format_output (row , metadata )
52
+ else :
53
+ record_count += 1
54
+ yield self .format_output (data , metadata )
53
55
if is_records_limit and record_count >= records_limit :
54
56
break
55
57
58
+ def format_output (self , item , metadata = None ):
59
+ output = metadata or {}
60
+ if self .raw_output :
61
+ output .update ({
62
+ DKUConstants .API_RESPONSE_KEY : json .dumps (item )
63
+ })
64
+ else :
65
+ output .update (parse_keys_for_json (item ))
66
+ return output
67
+
56
68
def get_writer (self , dataset_schema = None , dataset_partitioning = None ,
57
69
partition_id = None ):
58
70
"""
0 commit comments