Data Loading - Oracle



data.bcp
UNIX / LINUX

vim script.sh
!/bin/sh
rm -rf data2.bcp
iconv -f ISO-8859-14 data.bcp -t UTF-8 -o data2.bcp
export NLS_LANG=AMERICAN_AMERICA.WE8ISO8859P1
sqlldr user1/pswd1@server1 control=data.ctl log=data.log skip=1

$ chmod +x script.sh
data.ctl
load data
infile 'data2.bcp'
replace
into table temp1
fields terminated by "?" OPTIONALLY ENCLOSED BY '"'
trailing nullcols
(
COL1 CHAR(200),
COL2 CHAR(200),
COL3 CHAR(200)
)
$ . script.sh
create table temp1(
COL1 varchar(200),
COL2 varchar(200),
COL3 varchar(200)
);


WINDOWS
Convert file to UTF-8
iconv -f ISO-8859-14 data.bcp -t UTF-8 -o data2.bcp

script.bat
sqlldr user1/pswd1@ database_instance1 control=data.ctl log=data.log skip=1
pause
data.ctl
load data
CHARACTERSET WE8ISO8859P1
infile 'data2.bcp'
replace
into table temp1
fields terminated by "?" OPTIONALLY ENCLOSED BY '"'
trailing nullcols
(
COL1 CHAR(200),
COL2 CHAR(200),
COL3 CHAR(200)
)


Python
from sqlalchemy import *
import csv
username = "user1"
password = "pswd1"
database_instance = " database_instance1"
conn_str = 'oracle://%s:%s@%s' % (username, password, database_instance)
engine = create_engine(conn_str)
CONN = engine.connect()
CONN.execute("Alter session enable parallel dml")
def utf_8_encoder(unicode_csv_data):
for line in unicode_csv_data:
yield line.decode('iso-8859-1').encode('utf-8')
ch = "\xbf".decode("iso-8859-1").encode('utf-8')
i = 1
with open('data.dat', 'rb') as csvfile:
spamreader = csv.reader(utf_8_encoder(csvfile))
for row in spamreader:
str1 = ''.join([word.replace('\xbf',',') for word in row])
str1 = ''.join([word.replace(ch,',') for word in row])
str2 = str1.replace("\'"," ")
str3 = ''.join("'" + str(s) + "'," for s in str2.split(",")).strip(",")
if i <> 1:
if i <> 1 and i < 1000:
query = "insert into temp values("+str3+")"
print query
CONN.execute(query)
print i
i = i + 1


Learnings
$ head -n 10 data.bcp
$ file data.bcp
data.bcp: ISO-8859 text
$ locale charmap
UTF-8
$ set encoding=ISO-8859-1
delimeter
'\xbf'
Latin1
ISO-8859-1
set encoding=ISO-8859-1
sed -i 's/¿/|/g' data.dat

//run UNIX command in python
import subprocess
from subprocess import call
import shlex
! set encoding = "ISO-8859-1"
cmd = "sed -i 's/¿/|/g' data.dat"
call(shlex.split(cmd))
ch = "\xbf".decode("iso-8859-1")
print ch
¿

Leave a Reply

Your email address will not be published. Required fields are marked *