I have a list
"hhvrs" of length 2 with names and values. The names of these 2 elements of list are years "1920" and "1929".
我有一个长度为2的名单和值的“hhvrs”。这两个列表元素的名称是“1920”和“1929”年。
$`1920`
Nykvarn - 147 - 211920 Nykvarn - 262 - 211920 ...
1.235629 1.013191 ...
$`1929`
Långed - 125 - 11929 Långed - 126 - 11929 ...
1.316499 1.026785 ...
I also have a data.frame
"data" consisting of two years, 1920 and 1929. See dput
at the bottom of this post.
I then want to negatively match names (i.e. not include those names present in list
above). With other words I want to keep the names in my data frame, in last column uniquezCorrectCG
, that are not present in the list
above. I then want to calculate efficiencies for each company without the names present in the list.
我还有一个data.frame“数据”,包括两年,1920年和1929年。请参阅本文底部的dput。然后,我想要对名称进行否定匹配(即不包括上面列表中的名称)。换句话说,我想将名称保留在我的数据框中,在上一列uniqueZCorrectCG中,这些名称不在上面的列表中。然后,我想要计算每个公司的效率,而不会在列表中显示名称。
Here is my code:
这是我的代码:
hhvrsu=lapply(unique(data$year),function(x){
library(Benchmarking)
datat=data[data$year==x,]
datat2=datat[!(datat$uniquezCorrectCG %in% names(hhvrs[[x]])),]
#
y <- datat2[,"Ouput_ton",drop=FALSE]
rownames(y)=paste(datat2[,5],"-",datat2[,4])
#inputs
x=with(datat2,
cbind(Labour_input_1000_hour,
Capital_input_1000_sek,
Electric_input_Mwh,
Rawmaterial_input_M3))
rownames(x)=paste(datat2[,5],"-",datat2[,4],"-",datat2[,3])
e <- dea(x,y,RTS="vrs")
return(e$eff) }
)
names(hhvrsu)=unique(data$year)
But that fails. For example the company Långed - 125 - 11929
year 1929 is still present in the output of my code, while it should be dropped because Långed - 125 - 11929
is present in the list above...
但那失败了。例如,公司Långed - 125 - 11929年1929仍然出现在我的代码的输出中,而它应该被删除,因为Långed - 125 - 11929存在于上面的列表中......
head(hhvrsu[["1929"]])
Billingsfors - 123 - 11929 Billingsfors - 124 - 11929 Långed - 125 - 11929 Långed - 126 - 11929 Långed - 127 - 11929
0.9975506 1.0000000 1.0000000 1.0000000 1.0000000
Hånsfors - 183 - 21929
0.9928677
But it still works if i do it manually
:
但如果我手动操作它仍然有效:
datat=data[data$year==1929,]
datat2=datat[!(datat$uniquezCorrectCG %in% names(hhvrs[["1929"]])),]
#
y <- datat2[,"Ouput_ton",drop=FALSE]
rownames(y)=paste(datat2[,5],"-",datat2[,4])
#inputs
x=with(datat2,cbind(Labour_input_1000_hour,Capital_input_1000_sek,Electric_input_Mwh,Rawmaterial_input_M3))
rownames(x)=paste(datat2[,5],"-",datat2[,4],"-",datat2[,3])
e <- dea(x,y,RTS="vrs")
head(e$eff)
Billingsfors - 123 - 11929 Billingsfors - 124 - 11929 Hånsfors - 183 - 21929 Hällefors - 237 - 21929 Grycksbo - 350 - 21929
0.9984071 1.0000000 1.0000000 0.5863832 0.9813024
Brättne - 100 - 31929
0.9915349
in e$eff above Långed - 125 - 11929
is dropped!
在Långed以上的e $ eff中 - 125 - 11929被删除了!
EDIT
:
编辑:
It works if I put as.character(x)
below instead of simply x
如果我把as.character(x)放在下面而不是简单的x,它就可以工作
hhvrsu=lapply(unique(data$year),function(x){
library(Benchmarking)
datat=data[data$year==x,]
datat2=datat[!(datat$uniquezCorrectCG %in% names(hhvrs[[**as.character(x)**]])),]
#
y <- datat2[,"Ouput_ton",drop=FALSE]
rownames(y)=paste(datat2[,5],"-",datat2[,4])
#inputs
x=with(datat2,
cbind(Labour_input_1000_hour,
Capital_input_1000_sek,
Electric_input_Mwh,
Rawmaterial_input_M3))
rownames(x)=paste(datat2[,5],"-",datat2[,4],"-",datat2[,3])
e <- dea(x,y,RTS="vrs")
return(e$eff) }
)
names(hhvrsu)=unique(data$year)
Any suggestions?
有什么建议么?
Dputs:
Dputs:
dput(hhvrs)
structure(list(`1920` = structure(c(1.23562876282578, 1.01319073788091,
1.55783496400001, 1.06191988898698, 1.12744927131341, 1.08504615635299,
1.25725741409574, 2.03370195312046, 1.00667697472372, 1.00260726981462,
1.3050604346423, 1.3594555255334, 1.55671945006842, 1.0072581093466,
1.65164991096899, 2.47385616808447, 1.18471196771314, 1.24186522915967,
1.65133103063843, Inf, 1.16498198151401, 1.07017484481922), .Names = c("Nykvarn - 147 - 211920",
"Nykvarn - 262 - 211920", "Tumba - 68 - 381920", "Byske - 294 - 451920",
"Långed - 127 - 571920", "Väja - 270 - 691920", "Ljusfors - 141 - 731920",
"Skärblacka - 370 - 731920", "Sätra - 152 - 781920", "Krokfors - 129 - 871920",
"Åsen - 207 - 1011920", "Åsen - 208 - 1011920", "Lagerfors - 225 - 10121920",
"Lindefors - 243 - 10281920", "Munksjö - 253 - 10281920", "Qvill - 211 - 10431920",
"Esseltewell - 375 - 10521920", "Esseltewell - 376 - 10521920",
"Ulriksfors - 205 - 10541920", "Sellnäs - 352 - 10541920", "Vivstavarv - 314 - 10751920",
"Älvsborg - 369 - 10791920")), `1929` = structure(c(1.31649939189229,
1.02678542256861, 1.50667886828221, 1.06101596031178, 1.00477142430659,
Inf, 1.00038550231904, 1.10347307305662, 1.53782048667181, 1.80890790261425,
1.06103833744605, 1.00036736526695, 1.01053736983199, 1.01119078294682,
1.00295000872313, 1.01778128036389, 1.22049428994262, 1.15078822074877,
1.00346763843347, 1.2192497185324, 1.03195112444193, 1.71491513543284,
1.00168840525869, 1.00575972592046, 1.105483053952, 1.00427057272637,
1.94482017228275, 1.00388363163126), .Names = c("Långed - 125 - 11929",
"Långed - 126 - 11929", "Långed - 127 - 11929", "Hällefors - 234 - 21929",
"Göteborg-Dals - 156 - 91929", "Papyrus - 280 - 231929", "Sofiehem - 330 - 271929",
"Tollare - 66 - 361929", "Tumba - 68 - 381929", "Alstermo - 4 - 491929",
"Billerud - 106 - 571929", "Fengersfors - 135 - 711929", "Gamlestaden - 153 - 821929",
"Gransholm - 228 - 851929", "Åsen - 207 - 1011929", "Nykvarn - 262 - 1101929",
"Haga - 24 - 10041929", "Ljusne - 218 - 10181929", "Husum - 232 - 10251929",
"Munksjö - 253 - 10281929", "Pauliström - 239 - 10311929", "Qvill - 211 - 10431929",
"Esseltewell - 375 - 10521929", "Ställdalen - 356 - 10531929",
"Kvarnsveden - 343 - 10541929", "Skutskär - 345 - 10541929",
"Sellnäs - 352 - 10541929", "Vivstavarv - 314 - 10751929"))), .Names = c("1920",
"1929"))
Dput data.frame
输入data.frame
dput( data[data$year==1929,][1:5,])
structure(list(company_code = c(1L, 1L, 1L, 1L, 1L), company_name = c("AB Billingsfors-Långed",
"AB Billingsfors-Långed", "AB Billingsfors-Långed", "AB Billingsfors-Långed",
"AB Billingsfors-Långed"), year_cg_code = c(11929L, 11929L, 11929L,
11929L, 11929L), plant_code = 123:127, plant_name = c("Billingsfors",
"Billingsfors", "Långed", "Långed", "Långed"), plant_location = c("Billingsfors",
"Billingsfors", "Dals Långed", "Dals Långed", "Dals Långed"),
plant_location_by_municipal = c("Bengtsfors", "Bengtsfors",
"Bengtsfors", "Bengtsfors", "Bengtsfors"), year = c(1929L,
1929L, 1929L, 1929L, 1929L), Output_value_1000_sek = c(720L,
2304L, 531L, 3040L, 2079L), Labour_cost_1000_sek = c(102L,
348L, 93L, 199L, 225L), Capital_cost_1000_sek = c(108L, 468L,
126L, 304L, 180L), Electricity_cost_1000_sek = c(130L, 90L,
10L, 120L, 40L), Raw_material_cost_1000_sek = c(174L, 744L,
177L, 1824L, 1080L), Output_price_1_sek.ton = c(220L, 220L,
220L, 220L, 220L), Output_price__sek.ton = c(196L, 196L,
196L, 196L, 196L), Labour_price_sek.hour = c(1, 1.208333333,
2.657142857, 1.093406593, 2.083333333), Capital_price_interest.rate = c(4.556666667,
4.556666667, 4.556666667, 4.556666667, 4.556666667), Motive_Power_pricekr.MwH = c(43.10344828,
67.61833208, 31.54574132, 93.45794393, 45.14672686), Electricity_price_kr.MwH = c(24.34456929,
24.19354839, 13.88888889, 25.26315789, 22.22222222), Raw_Material_price_kr.m3 = c(14.5,
15.5, 11.8, 19, 12), Mean_raw.material_price = c(14.3, 14.3,
14.3, 14.3, 14.3), Output_capacity_ton = c(6000L, 12000L,
3000L, 9500L, 9000L), Ouput_ton = c(3272L, 10472L, 2413L,
13818L, 9450L), Labour_input_1000_hour = c(102L, 288L, 35L,
182L, 108L), Capital_input_1000_sek = c(2853L, 1975L, 219L,
2634L, 878L), Motive_Power_Mwh = c(3016L, 1331L, 317L, 1284L,
886L), Electric_input_Mwh = c(5340, 3720, 720, 4750, 1800
), Rawmaterial_input_M3 = c(12000, 48000, 15000, 96000, 90000
), Capacity_Utilization = c(54.53333333, 87.26666667, 80.43333333,
145.4526316, 105), Labour_cost_share = c(14.16666667, 15.10416667,
17.51412429, 6.546052632, 10.82251082), Capital_cost_share = c(15,
20.3125, 23.72881356, 10, 8.658008658), Electricity_cost_share = c(18.05555556,
3.90625, 1.883239171, 3.947368421, 1.924001924), Raw_Material_cost_share = c(24.16666667,
32.29166667, 33.33333333, 60, 51.94805195), Labour_productivity = c(1.433165382,
1.624502304, 3.080154233, 3.392008925, 3.909230144), Capital_productivity = c(4.8,
22.1, 45.8, 21.9, 44.8), Power_productivity = c(0.24, 1.73,
1.68, 2.37, 2.35), Electricity_productivity = c(0.303469526,
1.39421497, 1.659846295, 1.440769899, 2.60017364), Raw.material.productivity = c(1.439189112,
1.151527229, 0.849086388, 0.759730866, 0.554210966), uniquezCorrect = c("Billingsfors - 123",
"Billingsfors - 124", "Långed - 125", "Långed - 126", "Långed - 127"
), uniquezCorrectCG = c("Billingsfors - 123 - 11929", "Billingsfors - 124 - 11929",
"Långed - 125 - 11929", "Långed - 126 - 11929", "Långed - 127 - 11929"
)), .Names = c("company_code", "company_name", "year_cg_code",
"plant_code", "plant_name", "plant_location", "plant_location_by_municipal",
"year", "Output_value_1000_sek", "Labour_cost_1000_sek", "Capital_cost_1000_sek",
"Electricity_cost_1000_sek", "Raw_material_cost_1000_sek", "Output_price_1_sek.ton",
"Output_price__sek.ton", "Labour_price_sek.hour", "Capital_price_interest.rate",
"Motive_Power_pricekr.MwH", "Electricity_price_kr.MwH", "Raw_Material_price_kr.m3",
"Mean_raw.material_price", "Output_capacity_ton", "Ouput_ton",
"Labour_input_1000_hour", "Capital_input_1000_sek", "Motive_Power_Mwh",
"Electric_input_Mwh", "Rawmaterial_input_M3", "Capacity_Utilization",
"Labour_cost_share", "Capital_cost_share", "Electricity_cost_share",
"Raw_Material_cost_share", "Labour_productivity", "Capital_productivity",
"Power_productivity", "Electricity_productivity", "Raw.material.productivity",
"uniquezCorrect", "uniquezCorrectCG"), row.names = 6:10, class = "data.frame")
1 个解决方案
#1
1
I'd do it a bit different (not using lapply
at all). I'd use stack
to construct a data.frame
from hhvrs
as follows, first:
我会做的有点不同(根本没有使用lapply)。我将使用堆栈从hhvrs构造一个data.frame,如下所示:
my.df <- stack(hhvrs)[, c("ind"), drop = FALSE]
names(my.df) <- c("year")
my.df <- transform(my.df, uniquezCorrectCG = rownames(my.df))
rownames(my.df) <- NULL
Now check for those entries where year
and uniquezCorrectCG
are present in data
but not in my.df
.
现在检查那些年份和uniquezCorrectCG存在于数据中但不存在于my.df中的条目。
data[!duplicated(rbind(my.df, data[, c("year",
"uniquezCorrectCG")]))[-seq_len(nrow(my.df))], ]
#1
1
I'd do it a bit different (not using lapply
at all). I'd use stack
to construct a data.frame
from hhvrs
as follows, first:
我会做的有点不同(根本没有使用lapply)。我将使用堆栈从hhvrs构造一个data.frame,如下所示:
my.df <- stack(hhvrs)[, c("ind"), drop = FALSE]
names(my.df) <- c("year")
my.df <- transform(my.df, uniquezCorrectCG = rownames(my.df))
rownames(my.df) <- NULL
Now check for those entries where year
and uniquezCorrectCG
are present in data
but not in my.df
.
现在检查那些年份和uniquezCorrectCG存在于数据中但不存在于my.df中的条目。
data[!duplicated(rbind(my.df, data[, c("year",
"uniquezCorrectCG")]))[-seq_len(nrow(my.df))], ]