从字符串中提取匹配的单词

时间:2022-09-13 00:22:52

I have a database structure - abbreviated version below

我有一个数据库结构 - 下面的缩写版本

structure(list(sex1 = c("totalmaleglobal", "totalfemaleglobal", 
"totalglobal", "totalfemaleGSK", "totalfemaleglobal", 
"totalfemaleUN")), .Names = "sex1", row.names = c(NA, 6L),
class="data.frame")

I want to extract the words 'total', 'totalmale', 'totalfemale'

我想提取'total','totalmale','totalfemale'这个词

How do do this?

怎么办?

I tried regex with the following code

我用以下代码尝试了正则表达式

pattern1=c("total")
pattern2=c("totalmale")
pattern3=c("totalfemale")

daly$sex <- str_extract(daly$sex1,pattern1)
daly$sex <- str_extract(daly$sex1,pattern2)
daly$sex <- str_extract(daly$sex1,pattern3)

But its giving me NA.

但它给了我NA。

4 个解决方案

#1


2  

Maybe

也许

library(stringr)
daly$sex <- str_extract(daly$sex1,paste(rev(mget(ls(pattern = "pattern\\d+"))), collapse="|"))
daly
#                sex1         sex
# 1   totalmaleglobal   totalmale
# 2 totalfemaleglobal totalfemale
# 3       totalglobal       total
# 4    totalfemaleGSK totalfemale
# 5 totalfemaleglobal totalfemale
# 6     totalfemaleUN totalfemale

#2


2  

Two steps with gsub,

gsub的两个步骤,

v2 <- gsub(paste(v1, collapse='|'), '', d1$sex1)

gsub(paste(v2, collapse='|'), '', d1$sex1)
#[1] "totalmale"   "totalfemale" "total"       "totalfemale" "totalfemale" "totalfemale"

where

哪里

v1 <- c('total', 'totalmale', 'totalfemale')

#3


1  

try this:

尝试这个:

test = structure(list(sex1 = c("totalmaleglobal", "totalfemaleglobal", 
                    "totalglobal", "totalfemaleGSK", "totalfemaleglobal", 
                    "totalfemaleUN")), .Names = "sex1", row.names = c(NA, 6L),
                    class="data.frame")

total = grep("total", test[[1]], perl=TRUE, value=TRUE)
totalmale = grep("totalmale", test[[1]], perl=TRUE, value=TRUE)
totalfemale = grep("totalfemale", test[[1]], perl=TRUE, value=TRUE)

print(total)
print(totalmale)
print(totalfemale)

#4


0  

We could also do sapply and grepl (in base R) over the wanted patterns (s1 vector) as follows:

我们也可以在想要的模式(s1向量)上进行sapply和grepl(在基础R中),如下所示:

x <- sapply(s1,function(x) grepl(x, d1$sex1))
colnames(x)[max.col(x, ties.method = "first")]

# [1] "totalmale" "totalfemale" "total" "totalfemale" "totalfemale" "totalfemale"

where

哪里

s1 <- c("totalmale", "totalfemale", "total")

#1


2  

Maybe

也许

library(stringr)
daly$sex <- str_extract(daly$sex1,paste(rev(mget(ls(pattern = "pattern\\d+"))), collapse="|"))
daly
#                sex1         sex
# 1   totalmaleglobal   totalmale
# 2 totalfemaleglobal totalfemale
# 3       totalglobal       total
# 4    totalfemaleGSK totalfemale
# 5 totalfemaleglobal totalfemale
# 6     totalfemaleUN totalfemale

#2


2  

Two steps with gsub,

gsub的两个步骤,

v2 <- gsub(paste(v1, collapse='|'), '', d1$sex1)

gsub(paste(v2, collapse='|'), '', d1$sex1)
#[1] "totalmale"   "totalfemale" "total"       "totalfemale" "totalfemale" "totalfemale"

where

哪里

v1 <- c('total', 'totalmale', 'totalfemale')

#3


1  

try this:

尝试这个:

test = structure(list(sex1 = c("totalmaleglobal", "totalfemaleglobal", 
                    "totalglobal", "totalfemaleGSK", "totalfemaleglobal", 
                    "totalfemaleUN")), .Names = "sex1", row.names = c(NA, 6L),
                    class="data.frame")

total = grep("total", test[[1]], perl=TRUE, value=TRUE)
totalmale = grep("totalmale", test[[1]], perl=TRUE, value=TRUE)
totalfemale = grep("totalfemale", test[[1]], perl=TRUE, value=TRUE)

print(total)
print(totalmale)
print(totalfemale)

#4


0  

We could also do sapply and grepl (in base R) over the wanted patterns (s1 vector) as follows:

我们也可以在想要的模式(s1向量)上进行sapply和grepl(在基础R中),如下所示:

x <- sapply(s1,function(x) grepl(x, d1$sex1))
colnames(x)[max.col(x, ties.method = "first")]

# [1] "totalmale" "totalfemale" "total" "totalfemale" "totalfemale" "totalfemale"

where

哪里

s1 <- c("totalmale", "totalfemale", "total")