2018.03.27 pandas duplicated 和 replace 使用

时间:2023-01-16 22:18:34
 1 #.duplicated / .replace 
 2 import numpy as np
 3 import pandas as pd
 4 s = pd.Series([1,1,1,1,1,2,3,3,3,4,4,5,6,6])
 5 print(s)
 6 print(s.duplicated())#True表示重复 得到布尔型
 7 print(s[s.duplicated() == False])#
 8 #通过布尔类型的判断来得到不重复的值
 9 
10 s_re = s.drop_duplicates()#直接去除重复值
11 #inplace=True 表示直接修改原来的值 
12 print(s_re)
13 print('------')
14 
15 #DataFrame测试
16 df = pd.DataFrame({'key1':['a','a',3,4,5],
17                    'key2':['a','a','b','b','c']})
18 print(df)
19 print('---------------------')
20 print(df.duplicated())
21 print('---------------------')
22 print(df.drop_duplicates())

结果:

0     1
1     1
2     1
3     1
4     1
5     2
6     3
7     3
8     3
9     4
10    4
11    5
12    6
13    6
dtype: int64
0     False
1      True
2      True
3      True
4      True
5     False
6     False
7      True
8      True
9     False
10     True
11    False
12    False
13     True
dtype: bool
0     1
5     2
6     3
9     4
11    5
12    6
dtype: int64
0     1
5     2
6     3
9     4
11    5
12    6
dtype: int64
------
  key1 key2
0    a    a
1    a    a
2    3    b
3    4    b
4    5    c
---------------------
0    False
1     True
2    False
3    False
4    False
dtype: bool
---------------------
  key1 key2
0    a    a
2    3    b
3    4    b
4    5    c
1 #.replace()
2 s = pd.Series(list('aaabbbcdd'))
3 print(s)
4 print(s.replace('a',np.nan))
5 print(s.replace(['a','d'],np.nan))
6 print(s.replace({'a':'Hello','d':'World'}))

结果:

0    a
1    a
2    a
3    b
4    b
5    b
6    c
7    d
8    d
dtype: object
0    NaN
1    NaN
2    NaN
3      b
4      b
5      b
6      c
7      d
8      d
dtype: object
0    NaN
1    NaN
2    NaN
3      b
4      b
5      b
6      c
7    NaN
8    NaN
dtype: object
0    Hello
1    Hello
2    Hello
3        b
4        b
5        b
6        c
7    World
8    World
dtype: object