旧文重发,原文刊载于2006年6月8日。
近日工作需要,要大批量导出数据,开始用SQL*Plus的SPOOL功能。结果老是提示“xrealloc: cannot reallocate XXX bytes”。而且每次发生的时间还不同,有时候是到文件1G就出错,有时候是2G多出错。怀疑是32位Linux问题,换到Solaris 8上还是如故。客户端都是9201的,到Metalink上搜索没有结果。郁闷之余到Oracle-l上发了个邮件,收到一位高手的指点,说可能是内存泄露问题,SQL*Plus的开发人员估计也不会想到有人会导出如此大量数据。在邮件往复之间,那位高手提醒可以用UTL_FILE来导出。忽然想起Tom的两本书里都有提到用PL/SQL来导出,于是翻出书里的那个网址http://asktom.oracle.com/~tkyte/flat/index.html,进去一看,PL/SQL的效率比SQL*Plus要高,缺点是生成的文件必须在主机上。SQL*Plus可以调整arraysize来提高效率(当然经我实践大数据量时还有内存泄露之虞)。Pro*C是高效的方法,缺点是需要写程序、编译。
硬着头皮看Pro*C咯,好在Tom给了原始代码,抓下来放到一台Linux的机器上proc报错,应该是proc配置include库的问题,找到一个makefile,倒是没啥问题了,但是最后链接失败,报无法识别’sqlca’。到网上一搜,和我一样晕的人还有,要修改代码,多include进,但是看Tom的帖子似乎没提到这个问题,不知道为什么。
下面是修改后的源代码,我顺便在里面修改了会话的一些参数,以处理中文和时间格式:
#include
#include
#include
#include
#define MAX_VNAME_LEN 30
#define MAX_INAME_LEN 30
static char * USERID = NULL;
static char * SQLSTMT = NULL;
static char * ARRAY_SIZE = "10";
#define vstrcpy( a, b ) \
(strcpy( a.arr, b ), a.len = strlen( a.arr ), a.arr)
EXEC SQL INCLUDE SQLCA;
EXEC SQL INCLUDE sqlda;
extern SQLDA *sqlald();
extern void sqlclu();
static void die( char * msg )
{
fprintf( stderr, "%s\n", msg );
exit(1);
}
/*
this array contains a default mapping
I am using to constrain the
lengths of returned columns. It is mapping,
for example, the Oracle
NUMBER type (type code = 2) to be 45 characters
long in a string.
*/
static int lengths[] =
{ -1, 0, 45, 0, 0, 0, 0, 0, 2000, 0, 0,
18, 25, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 512, 2000 };
static void process_parms( argc, argv )
int argc;
char * argv[];
{
int i;
for( i = 1; i < argc; i++ )
{
if ( !strncmp( argv[i], "userid=", 7 ) )
USERID = argv[i]+7;
else
if ( !strncmp( argv[i], "sqlstmt=", 8 ) )
SQLSTMT = argv[i]+8;
else
if ( !strncmp( argv[i], "arraysize=", 10 ) )
ARRAY_SIZE = argv[i]+10;
else
{
fprintf( stderr,
"usage: %s %s %s\n",
argv[0],
"userid=xxx/xxx sqlstmt=query ",
"arraysize=\n" );
exit(1);
}
}
if ( USERID == NULL || SQLSTMT == NULL )
{
fprintf( stderr,
"usage: %s %s %s\n",
argv[0],
"userid=xxx/xxx sqlstmt=query ",
"arraysize=\n" );
exit(1);
}
}
static void sqlerror_hard()
{
EXEC SQL WHENEVER SQLERROR CONTINUE;
fprintf(stderr,"\nORACLE error detected:");
fprintf(stderr,"\n% .70s \n", sqlca.sqlerrm.sqlerrmc);
EXEC SQL ROLLBACK WORK RELEASE;
exit(1);
}
static SQLDA * process_1(char * sqlstmt, int array_size )
{
SQLDA * select_dp;
int i;
int j;
int null_ok;
int precision;
int scale;
int size = 10;
fprintf( stderr, "Unloading '%s'\n", sqlstmt );
fprintf( stderr, "Array size = %d\n", array_size );
EXEC SQL WHENEVER SQLERROR DO sqlerror_hard();
EXEC SQL PREPARE S FROM :sqlstmt;
EXEC SQL DECLARE C CURSOR FOR S;
if ((select_dp = sqlald(size,MAX_VNAME_LEN,MAX_INAME_LEN))
== NULL )
die( "Cannot allocate memory for select descriptor." );
select_dp->N = size;
EXEC SQL DESCRIBE SELECT LIST FOR S INTO select_dp;
if ( !select_dp->F ) return NULL;
if (select_dp->F < 0) { size = -select_dp->F;
sqlclu( select_dp );
if ((select_dp =
sqlald (size, MAX_VNAME_LEN, MAX_INAME_LEN))
== NULL )
die( "Cannot allocate memory for descriptor." );
EXEC SQL DESCRIBE SELECT LIST FOR S INTO select_dp;
}
select_dp->N = select_dp->F;
for (i = 0; i < select_dp->N; i++)
select_dp->I[i] = (short *) malloc(sizeof(short) *
array_size );
for (i = 0; i < select_dp->F; i++)
{
sqlnul (&(select_dp->T[i]),
&(select_dp->T[i]), &null_ok);
if ( select_dp->T[i] < sizeof(lengths)/sizeof(lengths[0]) ) { if ( lengths[select_dp->T[i]] )
select_dp->L[i] = lengths[select_dp->T[i]];
else select_dp->L[i] += 5;
}
else select_dp->L[i] += 5;
select_dp->T[i] = 5;
select_dp->V[i] = (char *)malloc( select_dp->L[i] *
array_size );
for( j = MAX_VNAME_LEN-1;
j > 0 && select_dp->S[i][j] == ' ';
j--);
fprintf (stderr,
"%s%.*s", i?",":"", j+1, select_dp->S[i]);
}
fprintf( stderr, "\n" );
EXEC SQL OPEN C;
return select_dp;
}
static void process_2( SQLDA * select_dp, int array_size )
{
int last_fetch_count;
int row_count = 0;
short ind_value;
char * char_ptr;
int i,
j;
for ( last_fetch_count = 0;
;
last_fetch_count = sqlca.sqlerrd[2] )
{
EXEC SQL FOR :array_size FETCH C
USING DESCRIPTOR select_dp;
for( j=0; j < sqlca.sqlerrd[2]-last_fetch_count; j++ )
{
for (i = 0; i < select_dp->F; i++)
{
ind_value = *(select_dp->I[i]+j);
char_ptr = select_dp->V[i] +
(j*select_dp->L[i]);
printf( "%s%s", i?",":"",
ind_value?"(null)":char_ptr );
}
row_count++;
printf( "\n" );
}
if ( sqlca.sqlcode > 0 ) break;
}
sqlclu(select_dp);
EXEC SQL CLOSE C;
EXEC SQL COMMIT WORK;
fprintf( stderr, "%d rows extracted\n", row_count );
}
main( argc, argv )
int argc;
char * argv[];
{
EXEC SQL BEGIN DECLARE SECTION;
VARCHAR oracleid[50];
EXEC SQL END DECLARE SECTION;
SQLDA * select_dp;
process_parms( argc, argv );
/* Connect to ORACLE. */
vstrcpy( oracleid, USERID );
EXEC SQL WHENEVER SQLERROR DO sqlerror_hard();
EXEC SQL CONNECT racleid;
fprintf(stderr, "\nConnected to ORACLE as user: %s\n\n",
oracleid.arr);
EXEC SQL ALTER SESSION
SET NLS_DATE_FORMAT = 'YYYYMMDDHH24:MI:SS';
EXEC SQL ALTER SESSION
SET NLS_LANGUAGE = "SIMPLIFIED CHINESE";
select_dp = process_1( SQLSTMT, atoi(ARRAY_SIZE) );
process_2( select_dp , atoi(ARRAY_SIZE));
/* Disconnect from ORACLE. */
EXEC SQL COMMIT WORK RELEASE;
exit(0);
}
顺便附上我的makefile,在RHEL AS3上用的:
CC=gcc
LD=ld
OBJ=obj/
##########数据库编译选项###########
PROCFLAGS=unsafe_null=yes dynamic=ansi mode=oracle dbms=V8 parse=full\
release_cursor=no sqlcheck=SEMANTICS ireclen=512 include=./.\
sys_include=/usr/include sys_include=/usr/lib/gcc-lib/i386-redhat-linux/3.2.3/include \
def_sqlcode=yes
INCL=-I/usr/include -I./. -I${ORACLE_HOME}/precomp/public
BIN_LIBS= -lc -L${ORACLE_HOME}/lib -lclntsh
##########编译规则################
.SUFFIXES: .pc .o .c
.c.o:
${CC} -g -c $*.c ${INCL}
.pc.o:
proc $(PROCFLAGS) iname=$*.pc
$(CC) -g -c $*.c ${INCL}
rm -f $*.c $*.lis
all:unload
unload:unload.o
rm -f $@
${CC} -o $@ unload.o \
$(BIN_LIBS)
#########################################################
#编译公用目标文件
#########################################################
########清理目标文件###########################
clean:
rm -f *.o *.lis
编译以后生成unload程序,执行如下:
unload userid=XXX/XXX sqlstmt='select * from dual' arraysize=100>XXX.DAT
这样DAT文件里就是数据了。
经我测试,在数据库主机上,30分钟导出了7GB的文件,强啊!效率上可以调整的一个是arraysize参数,这个一般100-200都可以,更大的除非你IO超强,不然也没有意义;另一个是所谓并行,其实这个C程序也就是执行SELECT语句,因此将原表改为PARALLEL就可以充分利用系统资源了^_^。
转载自:http://www.readern.com/export-oracle-table-to-flat-text-in-proc.html