Read sas file in python
Global Variables:
{
"v_filename": "sas1234",
"v_file": "sas1234.sas7bdat",
"v_path": "/path_smptp/",
"v_file_date": "202301"
}
"v_filename": "sas1234",
"v_file": "sas1234.sas7bdat",
"v_path": "/path_smptp/",
"v_file_date": "202301"
}
Scenario (Python code) :
import os
import dataiku
import sys
import time
import glob
client = dataiku.api_client()
project = client.get_project('MIGRATION')
scenario = project.get_scenario("MIgration_FILES")
os.chdir('/smtp_path')
filepath='/smtp_path/'
for file in (glob.glob('*.sas7bdat')):
#file='sas1234.sas7bdat'
filenm=file.split('.')[0]
partdt=filenm.split('_')[-1:][0]
filenm=filenm.replace('_'+partdt,'').strip()
project_variables = project.get_variables()
project_variables['standard']['v_filename']=filenm
project_variables['standard']['v_file']=file
project_variables['standard']['v_path']=filepath
project_variables['standard']['v_file_date']=partdt
project.set_variables(project_variables)
scenario.run_and_wait()
import dataiku
import sys
import time
import glob
client = dataiku.api_client()
project = client.get_project('MIGRATION')
scenario = project.get_scenario("MIgration_FILES")
os.chdir('/smtp_path')
filepath='/smtp_path/'
for file in (glob.glob('*.sas7bdat')):
#file='sas1234.sas7bdat'
filenm=file.split('.')[0]
partdt=filenm.split('_')[-1:][0]
filenm=filenm.replace('_'+partdt,'').strip()
project_variables = project.get_variables()
project_variables['standard']['v_filename']=filenm
project_variables['standard']['v_file']=file
project_variables['standard']['v_path']=filepath
project_variables['standard']['v_file_date']=partdt
project.set_variables(project_variables)
scenario.run_and_wait()
Python recipe code:
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
import os
import glob
df = pd.read_sas(dataiku.get_custom_variables()["v_path"]+'/'+dataiku.get_custom_variables() ["v_file"],encoding= 'unicode_escape')
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
import os
import glob
df = pd.read_sas(dataiku.get_custom_variables()["v_path"]+'/'+dataiku.get_custom_variables() ["v_file"],encoding= 'unicode_escape')
#df = pd.read_sas('/smptp_path/sas1234.sas7bdat',encoding= 'unicode_escape')
for col in df.columns:
if df[col].dtype == 'object':
df[col] = df[col].apply(lambda x: x.decode('utf-8') if isinstance(x, bytes) else x)
#print(df.columns.tolist())
# Write recipe outputs on hdfs
Migration_sas_files = dataiku.Dataset("Migration_sas_files")
Migration_sas_files.write_with_schema(df)
Comments
Post a Comment