I developed a script which parse texts and on specific condition it replace text and generate new file.
我开发了一个解析文本的脚本,在特定条件下它替换文本并生成新文件。
I would like to know if i can optimize it or any alternative suggetions
我想知道我是否可以优化它或任何替代建议
cat *inputparams.txt | awk -F"|" 'BEGIN {OFS=","} $2==1&&$3==1{$3="Subscription Creation without Previous"} $2==1&&$3==5{$3="Offer Upgrade"} $2==1&&$3==6{$3="Offer Downgrade"} $2==1&&$3==7{$3="Campaign Extend"} $2==1&&$3==8{$3="Campaign Change"} $2==1&&$3==27{$3="Subscription Update"} $2==2&&$3==2{$3="Charging Renewal"} $2==2&&$3==3{$3="Subscription Reactivation"} $2==2&&$3==4{$3="Subscription Reactivation with Recharge Monitoring"} $2==2&&$3==8 {$3="Campaign Change" } $2==2&&$3==30 {$3="Limited Service"} $2==3&&$3==11{$3="Cancellation"} $2==3&&$3==17{$3="Subscriber Account Reactivation"} $2==4&&$3==11{$3="Cancellation"} $2==5&&$3==11{$3="Cancellation"} $2==5&&$3==12{$3="Expiration"} $2==5&&$3==13{$3="Inactivation due to Charging"} $2==5&&$3==14{$3="Inactivation due to Ceased Account"} $2==5&&$3==15 {$3="Inactivation due to Payment Method Change"} $2==5&&$3==16 {$3="Inactivation due to Ownership Change"} $2==5&&$3==18 {$3="Inactivation due to Offer Upgrade"} $2==5&&$3==19 {$3="Inactivation due to Offer Downgrade" } $2==6&&$3==9{$3="Campaign Schedule"} $2==6&&$3==10{$3="Offer Schedule"} $1==5 && $2==2{$2="RENEWAL"} $1==4 && $2==2{$2="SUBS. CREATE RENEWAL AOC"} $1==6 && $2==3{$2="REFUND"} $1==4 && $2==5 {$2="INACTIVATION"} $1==5 && $2==4{$2="PENALTY"} $1==1{$1="RESERVE"} $1==2{$1="COMMIT"} $1==3{$1="ROLLBACK"} $1==5 && $2==1{$2="SUBS. CREATE DIRECT DEBIT"} $2==1 {$2="CREATION"} {print $1,$2,$3,$4,$5,$8}' > $RECON_PATH/tx_id.txt
cat *inputparams.txt | awk -F"[" '{print $4}' | awk -F"]" '{print $1}' | awk -F"," 'BEGIN {OFS=","} $2==1{$2="POSTPAID"} $2==2{$2="PREPAID"} {print $2,$1}' > $RECON_PATH/msisdn_payment
cat *inputparams.txt | awk -F"," '{print $3 }' > $RECON_PATH/package
paste -d"," $RECON_PATH/tx_id.txt $RECON_PATH/package $RECON_PATH/msisdn_payment > $RECON_PATH/final.txt
Following is sample record
以下是样本记录
5|2|3|rfe-29883066|9840311190936312183|2.0|49.0|20131119093631|[[],4900671,SOCIAL_DATA,null,SOCIAL DATA,20130710000000,,,[971508592346,2],null,7012183,20130926190549,[[{LIMITED_PERIOD_END_DATE=20131110093613}{INITIAL_CHARGED_AMOUNT=49.0}{INITIAL_CHARGE_OPTION=1}{ENE_EVENT_ID=24645862}{FULFILL_ON_RESERVE=0}],false,false,null,4900672,SOCIAL DATA,20130710000000,2,20131119093631,0,2,[111111111111,2],USSD,2592000000,[{[{PACKAGE_ID_ONE_TIME_NORMAL_QOS=2000002935}{PREFERRED_PACKAGE_ID=PACKAGE_ID_AUTO_RN}{PACKAGE_ID_ONE_TIME_WITH_THROTTLING=2000002935}{PACKAGE_ID_AUTO_RN=2000002881}{PROVISIONED_PACK_ID=2000002935}{PROVISIONED_PACK_TYPE=PACKAGE_ID_ONE_TIME_WITH_THROTTLING}],DATAN_SOCIAL_DATA,4900667,DATAN,DATAN,[]}{[{PACKAGE_ID_ONE_TIME_NORMAL_QOS=2000002922}{PREFERRED_PACKAGE_ID=PACKAGE_ID_AUTO_RN}{PACKAGE_ID_ONE_TIME_WITH_THROTTLING=2000002922}{PACKAGE_ID_AUTO_RN=2000002880}{PROVISIONED_PACK_ID=2000002922}{PROVISIONED_PACK_TYPE=PACKAGE_ID_ONE_TIME_WITH_THROTTLING}],DATAN_DATA_SOCIAL,4900669,DATAN,DATAN,[]}{[{PACKAGE_ID_ONE_TIME_NORMAL_QOS=2000003031}{PREFERRED_PACKAGE_ID=PACKAGE_ID_AUTO_RN}{PACKAGE_ID_ONE_TIME_WITH_THROTTLING=2000003031}{PACKAGE_ID_AUTO_RN=2000003030}],DATAN_SOCIAL_THROTT,5400425,DATAN,DATAN,[]}{[{RATE_PLAN_ID=629120}],MKTWSSOCIALDATA,4900665,CMN,CMN,[]}],2,null,6912967,20130926190549]]
Regards
2 个解决方案
#1
0
You could try
你可以试试
awk -F"|" -f parse.awk *inputparams.txt
where parse.awk
is:
parse.awk在哪里:
BEGIN {
OFS=","
}
{
getFields()
split($0,a,"[");
split(a[4],b,"]");
split(b[1],c,",");
if (c[2]==1) msi="POSTPAID"
else if (c[2]==2) msi="PREPAID"
msi=msi","c[1]
split($0,d,",")
pack=d[3]
print f1,f2,f3,f4,f5,f8,pack,msi
}
function getFields() {
f1=$1; f2=$2; f3=$3; f4=$4; f5=$5; f8=$8
if ($2==1) {
if ($3==1) {
f3="Subscription Creation without Previous"
}
if ($3==5) {
f3="Offer Upgrade"
}
if ($3==6) {
f3="Offer Downgrade"
}
if ($3==7) {
f3="Campaign Extend"
}
if ($3==8) {
f3="Campaign Change"
}
if ($3==27) {
f3="Subscription Update"
}
}
if ($2==2) {
if ($3==2) {
f3="Charging Renewal"
}
if ($3==3) {
f3="Subscription Reactivation"
}
if ($3==4) {
f3="Subscription Reactivation with Recharge Monitoring"
}
if ($3==8) {
f3="Campaign Change"
}
if ($3==30) {
f3="Limited Service"
}
}
if ($2==3) {
if ($3==11) {
f3="Cancellation"
}
if ($3==17) {
f3="Subscriber Account Reactivation"
}
}
if ($2==4) {
if ($3==11) {
f3="Cancellation"
}
}
if ($2==5) {
if ($3==11) {
f3="Cancellation"
}
if ($3==12) {
f3="Expiration"
}
if ($3==13) {
f3="Inactivation due to Charging"
}
if ($3==14) {
f3="Inactivation due to Ceased Account"
}
if ($3==15) {
f3="Inactivation due to Payment Method Change"
}
if ($3==16) {
f3="Inactivation due to Ownership Change"
}
if ($3==18) {
f3="Inactivation due to Offer Upgrade"
}
if ($3==19) {
f3="Inactivation due to Offer Downgrade"
}
}
if ($2==6) {
if ($3==9) {
f3="Campaign Schedule"
}
if ($3==10) {
f3="Offer Schedule"
}
}
if ($1==5 && $2==2) {
f2="RENEWAL"
}
if ($1==4 && $2==2) {
f2="SUBS. CREATE RENEWAL AOC"
}
if ($1==6 && $2==3) {
f2="REFUND"
}
if ($1==4 && $2==5) {
f2="INACTIVATION"
}
if ($1==5 && $2==4) {
f2="PENALTY"
}
if ($1==5 && $2==1) {
f2="SUBS. CREATE DIRECT DEBIT"
}
if ($2==1) {
f2="CREATION"
}
if ($1==1) {
f1="RESERVE"
}
if ($1==2) {
f1="COMMIT"
}
if ($1==3) {
f1="ROLLBACK"
}
}
#2
1
Here's what I was thinking. Put the script into a file and make it executable so it can be run like:
这就是我的想法。将脚本放入文件并使其可执行,以便它可以像以下一样运行:
script.awk *inputparams.txt
The script: #!/usr/bin/awk -f
脚本:#!/ usr / bin / awk -f
BEGIN {
FS="|"
OFS=","
# $3 field strings
arr[ "2==1 3==1" ] = "Subscription Creation without Previous"
arr[ "2==1 3==5" ] = "Offer Upgrade"
arr[ "2==1 3==6" ] = "Offer Downgrade"
arr[ "2==1 3==7" ] = "Campaign Extend"
arr[ "2==1 3==8" ] = "Campaign Change"
arr[ "2==1 3==27" ] = "Subscription Update"
arr[ "2==2 3==2" ] = "Charging Renewal"
arr[ "2==2 3==3" ] = "Subscription Reactivation"
arr[ "2==2 3==4" ] = "Subscription Reactivation with Recharge Monitoring"
arr[ "2==2 3==8" ] = "Campaign Change"
arr[ "2==2 3==30" ] = "Limited Service"
arr[ "2==3 3==11" ] = "Cancellation"
arr[ "2==3 3==17" ] = "Subscriber Account Reactivation"
arr[ "2==4 3==11" ] = "Cancellation"
arr[ "2==5 3==11" ] = "Cancellation"
arr[ "2==5 3==12" ] = "Expiration"
arr[ "2==5 3==13" ] = "Inactivation due to Charging"
arr[ "2==5 3==14" ] = "Inactivation due to Ceased Account"
arr[ "2==5 3==15" ] = "Inactivation due to Payment Method Change"
arr[ "2==5 3==16" ] = "Inactivation due to Ownership Change"
arr[ "2==5 3==18" ] = "Inactivation due to Offer Upgrade"
arr[ "2==5 3==19" ] = "Inactivation due to Offer Downgrade"
arr[ "2==6 3==9" ] = "Campaign Schedule"
arr[ "2==6 3==10" ] = "Offer Schedule"
# $2 field strings
arr[ "1==5 2==2" ] = "RENEWAL"
arr[ "1==4 2==2" ] = "SUBS. CREATE RENEWAL AOC"
arr[ "1==6 2==3" ] = "REFUND"
arr[ "1==4 2==5" ] = "INACTIVATION"
arr[ "1==5 2==4" ] = "PENALTY"
arr[ "1==5 2==1" ] = "SUBS. CREATE DIRECT DEBIT"
arr[ "0==0 2==1" ] = "CREATION" # this needs special attention
# $1 field strings
arr[ "0==0 1==1" ] = "RESERVE"
arr[ "0==0 1==2" ] = "COMMIT"
arr[ "0==0 1==3" ] = "ROLLBACK"
# $9 field, subfield 10 (by comma) strings - a key name "hack"
arr[ "9==10 2==1" ] = "POSTPAID"
arr[ "9==10 2==2" ] = "PREPAID"
}
# print output line here
{
print getString(0,1),getStringWithDefault(1,2),getString(2,3),$4,$5,$8,field9Strings()
}
function makeShortKey( field, value ) {
gsub( value, "", " " )
return field "==" value
}
function makeLongKey( f1, v1, f2, v2 ) {
return makeShortKey( f1, v1 ) " " makeShortKey( f2, v2 )
}
function getStringWithDefault( a, b ) {
sd = getString( a, b )
if( s == "" ) sd = getString( 0, b )
return sd
}
# take fields and their values to construct keys - 0 is special to 'standardize' keys
function getString( field1, field2 ) {
s = arr[ makeLongKey( field1, field1 == 0 ? 0 : $field1, field2, $field2 ) ]
return (s == "") ? $field2 : s
}
function field9Strings() {
split( $9, carr, "," )
sub( /\[/, "", carr[ 9 ] );
sub( /]/, "", carr[ 10 ])
s9 = arr[ "9==10 " makeShortKey( 2, carr[ 10 ] ) ]
return carr[ 3 ] OFS ((s9 == "") ? carr[ 10 ] : s9) OFS carr[ 9 ]
}
I've chosen to use the arr
because I think it makes the strings more maintainable. I've tried to chose keys that would make it possible to read in those strings from an string file in another revision.
我选择使用arr,因为我认为它使字符串更易于维护。我试图选择能够从另一个版本的字符串文件中读取这些字符串的键。
The keys for field 9 parsing are "hard-coded" right now.
字段9解析的键现在是“硬编码的”。
For arr[ "0==0 2==1" ]
I ended up adding a special getStringWithDefault()
function. If "CREATION" depended on a value from column 1, the key could be updated and that function could be removed.
对于arr [“0 == 0 2 == 1”],我最终添加了一个特殊的getStringWithDefault()函数。如果“CREATION”依赖于第1列中的值,则可以更新密钥并删除该功能。
#1
0
You could try
你可以试试
awk -F"|" -f parse.awk *inputparams.txt
where parse.awk
is:
parse.awk在哪里:
BEGIN {
OFS=","
}
{
getFields()
split($0,a,"[");
split(a[4],b,"]");
split(b[1],c,",");
if (c[2]==1) msi="POSTPAID"
else if (c[2]==2) msi="PREPAID"
msi=msi","c[1]
split($0,d,",")
pack=d[3]
print f1,f2,f3,f4,f5,f8,pack,msi
}
function getFields() {
f1=$1; f2=$2; f3=$3; f4=$4; f5=$5; f8=$8
if ($2==1) {
if ($3==1) {
f3="Subscription Creation without Previous"
}
if ($3==5) {
f3="Offer Upgrade"
}
if ($3==6) {
f3="Offer Downgrade"
}
if ($3==7) {
f3="Campaign Extend"
}
if ($3==8) {
f3="Campaign Change"
}
if ($3==27) {
f3="Subscription Update"
}
}
if ($2==2) {
if ($3==2) {
f3="Charging Renewal"
}
if ($3==3) {
f3="Subscription Reactivation"
}
if ($3==4) {
f3="Subscription Reactivation with Recharge Monitoring"
}
if ($3==8) {
f3="Campaign Change"
}
if ($3==30) {
f3="Limited Service"
}
}
if ($2==3) {
if ($3==11) {
f3="Cancellation"
}
if ($3==17) {
f3="Subscriber Account Reactivation"
}
}
if ($2==4) {
if ($3==11) {
f3="Cancellation"
}
}
if ($2==5) {
if ($3==11) {
f3="Cancellation"
}
if ($3==12) {
f3="Expiration"
}
if ($3==13) {
f3="Inactivation due to Charging"
}
if ($3==14) {
f3="Inactivation due to Ceased Account"
}
if ($3==15) {
f3="Inactivation due to Payment Method Change"
}
if ($3==16) {
f3="Inactivation due to Ownership Change"
}
if ($3==18) {
f3="Inactivation due to Offer Upgrade"
}
if ($3==19) {
f3="Inactivation due to Offer Downgrade"
}
}
if ($2==6) {
if ($3==9) {
f3="Campaign Schedule"
}
if ($3==10) {
f3="Offer Schedule"
}
}
if ($1==5 && $2==2) {
f2="RENEWAL"
}
if ($1==4 && $2==2) {
f2="SUBS. CREATE RENEWAL AOC"
}
if ($1==6 && $2==3) {
f2="REFUND"
}
if ($1==4 && $2==5) {
f2="INACTIVATION"
}
if ($1==5 && $2==4) {
f2="PENALTY"
}
if ($1==5 && $2==1) {
f2="SUBS. CREATE DIRECT DEBIT"
}
if ($2==1) {
f2="CREATION"
}
if ($1==1) {
f1="RESERVE"
}
if ($1==2) {
f1="COMMIT"
}
if ($1==3) {
f1="ROLLBACK"
}
}
#2
1
Here's what I was thinking. Put the script into a file and make it executable so it can be run like:
这就是我的想法。将脚本放入文件并使其可执行,以便它可以像以下一样运行:
script.awk *inputparams.txt
The script: #!/usr/bin/awk -f
脚本:#!/ usr / bin / awk -f
BEGIN {
FS="|"
OFS=","
# $3 field strings
arr[ "2==1 3==1" ] = "Subscription Creation without Previous"
arr[ "2==1 3==5" ] = "Offer Upgrade"
arr[ "2==1 3==6" ] = "Offer Downgrade"
arr[ "2==1 3==7" ] = "Campaign Extend"
arr[ "2==1 3==8" ] = "Campaign Change"
arr[ "2==1 3==27" ] = "Subscription Update"
arr[ "2==2 3==2" ] = "Charging Renewal"
arr[ "2==2 3==3" ] = "Subscription Reactivation"
arr[ "2==2 3==4" ] = "Subscription Reactivation with Recharge Monitoring"
arr[ "2==2 3==8" ] = "Campaign Change"
arr[ "2==2 3==30" ] = "Limited Service"
arr[ "2==3 3==11" ] = "Cancellation"
arr[ "2==3 3==17" ] = "Subscriber Account Reactivation"
arr[ "2==4 3==11" ] = "Cancellation"
arr[ "2==5 3==11" ] = "Cancellation"
arr[ "2==5 3==12" ] = "Expiration"
arr[ "2==5 3==13" ] = "Inactivation due to Charging"
arr[ "2==5 3==14" ] = "Inactivation due to Ceased Account"
arr[ "2==5 3==15" ] = "Inactivation due to Payment Method Change"
arr[ "2==5 3==16" ] = "Inactivation due to Ownership Change"
arr[ "2==5 3==18" ] = "Inactivation due to Offer Upgrade"
arr[ "2==5 3==19" ] = "Inactivation due to Offer Downgrade"
arr[ "2==6 3==9" ] = "Campaign Schedule"
arr[ "2==6 3==10" ] = "Offer Schedule"
# $2 field strings
arr[ "1==5 2==2" ] = "RENEWAL"
arr[ "1==4 2==2" ] = "SUBS. CREATE RENEWAL AOC"
arr[ "1==6 2==3" ] = "REFUND"
arr[ "1==4 2==5" ] = "INACTIVATION"
arr[ "1==5 2==4" ] = "PENALTY"
arr[ "1==5 2==1" ] = "SUBS. CREATE DIRECT DEBIT"
arr[ "0==0 2==1" ] = "CREATION" # this needs special attention
# $1 field strings
arr[ "0==0 1==1" ] = "RESERVE"
arr[ "0==0 1==2" ] = "COMMIT"
arr[ "0==0 1==3" ] = "ROLLBACK"
# $9 field, subfield 10 (by comma) strings - a key name "hack"
arr[ "9==10 2==1" ] = "POSTPAID"
arr[ "9==10 2==2" ] = "PREPAID"
}
# print output line here
{
print getString(0,1),getStringWithDefault(1,2),getString(2,3),$4,$5,$8,field9Strings()
}
function makeShortKey( field, value ) {
gsub( value, "", " " )
return field "==" value
}
function makeLongKey( f1, v1, f2, v2 ) {
return makeShortKey( f1, v1 ) " " makeShortKey( f2, v2 )
}
function getStringWithDefault( a, b ) {
sd = getString( a, b )
if( s == "" ) sd = getString( 0, b )
return sd
}
# take fields and their values to construct keys - 0 is special to 'standardize' keys
function getString( field1, field2 ) {
s = arr[ makeLongKey( field1, field1 == 0 ? 0 : $field1, field2, $field2 ) ]
return (s == "") ? $field2 : s
}
function field9Strings() {
split( $9, carr, "," )
sub( /\[/, "", carr[ 9 ] );
sub( /]/, "", carr[ 10 ])
s9 = arr[ "9==10 " makeShortKey( 2, carr[ 10 ] ) ]
return carr[ 3 ] OFS ((s9 == "") ? carr[ 10 ] : s9) OFS carr[ 9 ]
}
I've chosen to use the arr
because I think it makes the strings more maintainable. I've tried to chose keys that would make it possible to read in those strings from an string file in another revision.
我选择使用arr,因为我认为它使字符串更易于维护。我试图选择能够从另一个版本的字符串文件中读取这些字符串的键。
The keys for field 9 parsing are "hard-coded" right now.
字段9解析的键现在是“硬编码的”。
For arr[ "0==0 2==1" ]
I ended up adding a special getStringWithDefault()
function. If "CREATION" depended on a value from column 1, the key could be updated and that function could be removed.
对于arr [“0 == 0 2 == 1”],我最终添加了一个特殊的getStringWithDefault()函数。如果“CREATION”依赖于第1列中的值,则可以更新密钥并删除该功能。