pandas replace 替换功能function

时间:2021-09-06 14:43:27

  1. list like replace method
  2. dict like replace method
  3. regex expression


import pandas as pd
import numpy as np
s = pd.Series([0,1,2,3,4])

s.replace(0,5)  # single value to replace
0    5
1 1
2 2
3 3
4 4
dtype: int64
df = pd.DataFrame({'A':[0,1,2,3,4],
"B":[5,6,7,8,9],
"C":['a','b','c','d','e']})
df.replace(0,5)  # replace all 0 to 5

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {
vertical-align: top;
} .dataframe thead th {
text-align: right;
}
A B C
0 5 5 a
1 1 6 b
2 2 7 c
3 3 8 d
4 4 9 e
df  # the default parameter in_place= False
# DataFrame.replace(to_replace=None, value=None, inplace=False, limit=None, regex=False, method='pad')
# to_place can be number,string list or dict and even regex expression
# limit Maximum size gap to forward or backward fill.

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {
vertical-align: top;
} .dataframe thead th {
text-align: right;
}
A B C
0 0 5 a
1 1 6 b
2 2 7 c
3 3 8 d
4 4 9 e

1. list like replace method

df.replace([1,2,3,4],[4,3,2,1])  # content to replace . to_replace=[1,2,3,4],value=[4,3,2,1]

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {
vertical-align: top;
} .dataframe thead th {
text-align: right;
}
A B C
0 0 5 a
1 4 6 b
2 3 7 c
3 2 8 d
4 1 9 e
df.replace([1,2,3,4],100)  # to_replace=[1,2,3,4],value=4

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {
vertical-align: top;
} .dataframe thead th {
text-align: right;
}
A B C
0 0 5 a
1 100 6 b
2 100 7 c
3 100 8 d
4 100 9 e
df.replace([1,2],method='bfill')   # . like fillna with mehtod bfill(backfill), and the default mehtod was pad

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {
vertical-align: top;
} .dataframe thead th {
text-align: right;
}
A B C
0 0 5 a
1 3 6 b
2 3 7 c
3 3 8 d
4 4 9 e

2. dict like replace method

df.replace({2:20,6:100})  # to_replace =2 value=20,to_replace=6,value =100

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {
vertical-align: top;
} .dataframe thead th {
text-align: right;
}
A B C
0 0 5 a
1 1 100 b
2 20 7 c
3 3 8 d
4 4 9 e
df.replace({'A':2,'B':7},1000)  # . to_replace={'A':2,"B":7}, value=1000

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {
vertical-align: top;
} .dataframe thead th {
text-align: right;
}
A B C
0 0 5 a
1 1 6 b
2 1000 1000 c
3 3 8 d
4 4 9 e
df.replace({'A':{1:1000,4:20}})   # in colomn A to_replace=1,value=1000, to_replace=4, value=20

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {
vertical-align: top;
} .dataframe thead th {
text-align: right;
}
A B C
0 0 5 a
1 1000 6 b
2 2 7 c
3 3 8 d
4 20 9 e

3. regex expression

df = pd.DataFrame({'A':['bat','foot','bait'],
'B':['abc','bar','foot']})
df.replace(to_replace=r'^ba.$',value='vvvv',regex=True)  # to define to_replace and value in the function

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {
vertical-align: top;
} .dataframe thead th {
text-align: right;
}
A B
0 vvvv abc
1 foot vvvv
2 bait foot
df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True)  # in column A  to_replce=r'^ba.$' value='new'

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {
vertical-align: top;
} .dataframe thead th {
text-align: right;
}
A B
0 new abc
1 foot bar
2 bait foot
df.replace({'A':{r"^ba.$":"new"}},regex=True)  #  same as above

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {
vertical-align: top;
} .dataframe thead th {
text-align: right;
}
A B
0 new abc
1 foot bar
2 bait foot
df.replace(regex=r'^ba.$',value='vvv')  # in the whole dataframe

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {
vertical-align: top;
} .dataframe thead th {
text-align: right;
}
A B
0 vvv abc
1 foot vvv
2 bait foot
df.replace(regex={r'^ba.$':'vvv','foot':'xyz'})

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {
vertical-align: top;
} .dataframe thead th {
text-align: right;
}
A B
0 vvv abc
1 xyz vvv
2 bait xyz
df.replace(regex=[r'^ba.$','foo.$'],value='vvv')

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {
vertical-align: top;
} .dataframe thead th {
text-align: right;
}
A B
0 vvv abc
1 vvv vvv
2 bait vvv