I have a database structure - abbreviated version below
我有一个数据库结构 - 下面的缩写版本
structure(list(sex1 = c("totalmaleglobal", "totalfemaleglobal",
"totalglobal", "totalfemaleGSK", "totalfemaleglobal",
"totalfemaleUN")), .Names = "sex1", row.names = c(NA, 6L),
class="data.frame")
I want to extract the words 'total', 'totalmale', 'totalfemale'
我想提取'total','totalmale','totalfemale'这个词
How do do this?
怎么办?
I tried regex with the following code
我用以下代码尝试了正则表达式
pattern1=c("total")
pattern2=c("totalmale")
pattern3=c("totalfemale")
daly$sex <- str_extract(daly$sex1,pattern1)
daly$sex <- str_extract(daly$sex1,pattern2)
daly$sex <- str_extract(daly$sex1,pattern3)
But its giving me NA.
但它给了我NA。
4 个解决方案
#1
2
Maybe
也许
library(stringr)
daly$sex <- str_extract(daly$sex1,paste(rev(mget(ls(pattern = "pattern\\d+"))), collapse="|"))
daly
# sex1 sex
# 1 totalmaleglobal totalmale
# 2 totalfemaleglobal totalfemale
# 3 totalglobal total
# 4 totalfemaleGSK totalfemale
# 5 totalfemaleglobal totalfemale
# 6 totalfemaleUN totalfemale
#2
2
Two steps with gsub
,
gsub的两个步骤,
v2 <- gsub(paste(v1, collapse='|'), '', d1$sex1)
gsub(paste(v2, collapse='|'), '', d1$sex1)
#[1] "totalmale" "totalfemale" "total" "totalfemale" "totalfemale" "totalfemale"
where
哪里
v1 <- c('total', 'totalmale', 'totalfemale')
#3
1
try this:
尝试这个:
test = structure(list(sex1 = c("totalmaleglobal", "totalfemaleglobal",
"totalglobal", "totalfemaleGSK", "totalfemaleglobal",
"totalfemaleUN")), .Names = "sex1", row.names = c(NA, 6L),
class="data.frame")
total = grep("total", test[[1]], perl=TRUE, value=TRUE)
totalmale = grep("totalmale", test[[1]], perl=TRUE, value=TRUE)
totalfemale = grep("totalfemale", test[[1]], perl=TRUE, value=TRUE)
print(total)
print(totalmale)
print(totalfemale)
#4
0
We could also do sapply
and grepl
(in base R) over the wanted patterns (s1
vector) as follows:
我们也可以在想要的模式(s1向量)上进行sapply和grepl(在基础R中),如下所示:
x <- sapply(s1,function(x) grepl(x, d1$sex1))
colnames(x)[max.col(x, ties.method = "first")]
# [1] "totalmale" "totalfemale" "total" "totalfemale" "totalfemale" "totalfemale"
where
哪里
s1 <- c("totalmale", "totalfemale", "total")
#1
2
Maybe
也许
library(stringr)
daly$sex <- str_extract(daly$sex1,paste(rev(mget(ls(pattern = "pattern\\d+"))), collapse="|"))
daly
# sex1 sex
# 1 totalmaleglobal totalmale
# 2 totalfemaleglobal totalfemale
# 3 totalglobal total
# 4 totalfemaleGSK totalfemale
# 5 totalfemaleglobal totalfemale
# 6 totalfemaleUN totalfemale
#2
2
Two steps with gsub
,
gsub的两个步骤,
v2 <- gsub(paste(v1, collapse='|'), '', d1$sex1)
gsub(paste(v2, collapse='|'), '', d1$sex1)
#[1] "totalmale" "totalfemale" "total" "totalfemale" "totalfemale" "totalfemale"
where
哪里
v1 <- c('total', 'totalmale', 'totalfemale')
#3
1
try this:
尝试这个:
test = structure(list(sex1 = c("totalmaleglobal", "totalfemaleglobal",
"totalglobal", "totalfemaleGSK", "totalfemaleglobal",
"totalfemaleUN")), .Names = "sex1", row.names = c(NA, 6L),
class="data.frame")
total = grep("total", test[[1]], perl=TRUE, value=TRUE)
totalmale = grep("totalmale", test[[1]], perl=TRUE, value=TRUE)
totalfemale = grep("totalfemale", test[[1]], perl=TRUE, value=TRUE)
print(total)
print(totalmale)
print(totalfemale)
#4
0
We could also do sapply
and grepl
(in base R) over the wanted patterns (s1
vector) as follows:
我们也可以在想要的模式(s1向量)上进行sapply和grepl(在基础R中),如下所示:
x <- sapply(s1,function(x) grepl(x, d1$sex1))
colnames(x)[max.col(x, ties.method = "first")]
# [1] "totalmale" "totalfemale" "total" "totalfemale" "totalfemale" "totalfemale"
where
哪里
s1 <- c("totalmale", "totalfemale", "total")