Read sas file in python

Global Variables:

{
  "v_filename": "sas1234",
  "v_file": "sas1234.sas7bdat",
  "v_path": "/path_smptp/",
  "v_file_date": "202301"
}
 

Scenario (Python code) :

import os
import dataiku
import sys
import time
import glob

client = dataiku.api_client()
project = client.get_project('MIGRATION')
scenario = project.get_scenario("MIgration_FILES")
os.chdir('/smtp_path')
filepath='/smtp_path/'

for file in (glob.glob('*.sas7bdat')):
    #file='sas1234.sas7bdat'
    filenm=file.split('.')[0]
    partdt=filenm.split('_')[-1:][0]
    filenm=filenm.replace('_'+partdt,'').strip()
    
    project_variables = project.get_variables()
    project_variables['standard']['v_filename']=filenm
    project_variables['standard']['v_file']=file
    project_variables['standard']['v_path']=filepath
    project_variables['standard']['v_file_date']=partdt
    project.set_variables(project_variables)
    scenario.run_and_wait()



Python recipe code:


import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
import os
import glob

df = pd.read_sas(dataiku.get_custom_variables()["v_path"]+'/'+dataiku.get_custom_variables()                        ["v_file"],encoding= 'unicode_escape')

#df = pd.read_sas('/smptp_path/sas1234.sas7bdat',encoding= 'unicode_escape')

for col in df.columns:
    if df[col].dtype == 'object':
           df[col] = df[col].apply(lambda x: x.decode('utf-8') if isinstance(x, bytes) else x)
            
#print(df.columns.tolist())
# Write recipe outputs on hdfs
Migration_sas_files = dataiku.Dataset("Migration_sas_files")
Migration_sas_files.write_with_schema(df)
 

Comments

Popular posts from this blog

Date format issue with spark sql

Hive Partition sub folders HIVE_UNION_SUBDIR_1,HIVE_UNION_SUBDIR_2,HIVE_UNION_SUBDIR_8

Dataiku and Dremio date difference