潜在客户之间的R距离
R distance between potential clients
我有一份关于公司的数据 table,另一份关于银行的数据。我已经设法为每个银行和每个公司的每个城市生成经度和纬度。我想做的是为每家公司找到公司和银行之间的平均距离。
例如,假设我有以下数据集(我实际上有 400 多家银行和几千家公司):
Data_firm <- data.frame(
Firm = c("A", "B"),
Postal_firm = c("20246", "67720"),
Longfirm = c("9.2","7.8"),
Latfirm = c("42.6", "48.7")
)
Data_bank <- data.frame(
Bank = c("AB", "AC"),
Postal_bank = c("50670", "88290"),
Longbank = c("-1.2","6.8"),
Latbank = c("48.7", "48.0"),
Assets = c("100", "200"))
我想在 Data_firm 中添加一列,其中包含公司与系统中所有银行之间的平均距离(我使用距离 harvestine 计算),另一列包含按银行规模加权的平均距离(但我的问题是第一步)
提前致谢,
可能的解决方案:
library(tidyverse)
library(geosphere)
Data_firm <- data.frame(
Firm = c("A", "B", "C"),
Postal_firm = c("20246", "67720", "77720"),
Longfirm = c("9.2","7.8", "8.1"),
Latfirm = c("42.6", "48.7", "50")
)
Data_bank <- data.frame(
Bank = c("AB", "AC"),
Postal_bank = c("50670", "88290"),
Longbank = c("-1.2","6.8"),
Latbank = c("48.7", "48.0"),
Assets = c("100", "200"))
# There are 2 banks and 3 firms
Data_firm %>%
inner_join(Data_bank, by=character()) %>%
mutate(across(starts_with(c("Lat","Long")), as.numeric)) %>%
rowwise() %>%
mutate(dist = distm(c(Longbank, Latbank), c(Longfirm, Latfirm),
fun = distHaversine))
#> Firm Postal_firm Longfirm Latfirm Bank Postal_bank Longbank Latbank Assets
#> 1 A 20246 9.2 42.6 AB 50670 -1.2 48.7 100
#> 2 A 20246 9.2 42.6 AC 88290 6.8 48.0 200
#> 3 B 67720 7.8 48.7 AB 50670 -1.2 48.7 100
#> 4 B 67720 7.8 48.7 AC 88290 6.8 48.0 200
#> 5 C 77720 8.1 50.0 AB 50670 -1.2 48.7 100
#> 6 C 77720 8.1 50.0 AC 88290 6.8 48.0 200
#> dist
#> 1 1054789.9
#> 2 629728.5
#> 3 660855.4
#> 4 107446.8
#> 5 689276.5
#> 6 242027.5
这里有一个数据table/geosphere方法
library(data.table)
library(geosphere)
setDT(Data_firm);setDT(Data_bank)
#create data.table of all combinations of bank and firm
ans <- CJ(firm = Data_firm$Firm,bank = Data_bank$Bank)
# join in the coordinates
ans[Data_firm, `:=`(lon_f = i.Longfirm, lat_f = i.Latfirm), on = .(firm = Firm)]
ans[Data_bank, `:=`(lon_b = i.Longbank, lat_b = i.Latbank, assets = i.Assets), on = .(bank = Bank)]
# set coordinates to numeric
cols <- grep("lat|lon|ass", names(ans), value = TRUE)
ans[, (cols) := lapply(.SD, as.numeric), .SDcols = cols]
# calculate rowwise distance between firm and bank
ans[, firm_to_bank := distHaversine(matrix(c(lon_f, lat_f), ncol = 2),
matrix(c(lon_b, lat_b), ncol = 2))]
# firm bank lon_f lat_f lon_b lat_b assets firm_to_bank
# 1: A AB 9.2 42.6 -1.2 48.7 100 1054789.9
# 2: A AC 9.2 42.6 6.8 48.0 200 629728.5
# 3: B AB 7.8 48.7 -1.2 48.7 100 660855.4
# 4: B AC 7.8 48.7 6.8 48.0 200 107446.8
# calculate average distance to bank by firm
ans[, avg_dist_to_bank := mean(firm_to_bank), by = .(firm)]
ans[, wavg_dist_to_bank := weighted.mean(firm_to_bank, assets), by = .(firm)]
# firm bank lon_f lat_f lon_b lat_b assets firm_to_bank avg_dist_to_bank wavg_dist_to_bank
# 1: A AB 9.2 42.6 -1.2 48.7 100 1054789.9 842259.2 771415.6
# 2: A AC 9.2 42.6 6.8 48.0 200 629728.5 842259.2 771415.6
# 3: B AB 7.8 48.7 -1.2 48.7 100 660855.4 384151.1 291916.3
# 4: B AC 7.8 48.7 6.8 48.0 200 107446.8 384151.1 291916.3
我有一份关于公司的数据 table,另一份关于银行的数据。我已经设法为每个银行和每个公司的每个城市生成经度和纬度。我想做的是为每家公司找到公司和银行之间的平均距离。
例如,假设我有以下数据集(我实际上有 400 多家银行和几千家公司):
Data_firm <- data.frame(
Firm = c("A", "B"),
Postal_firm = c("20246", "67720"),
Longfirm = c("9.2","7.8"),
Latfirm = c("42.6", "48.7")
)
Data_bank <- data.frame(
Bank = c("AB", "AC"),
Postal_bank = c("50670", "88290"),
Longbank = c("-1.2","6.8"),
Latbank = c("48.7", "48.0"),
Assets = c("100", "200"))
我想在 Data_firm 中添加一列,其中包含公司与系统中所有银行之间的平均距离(我使用距离 harvestine 计算),另一列包含按银行规模加权的平均距离(但我的问题是第一步)
提前致谢,
可能的解决方案:
library(tidyverse)
library(geosphere)
Data_firm <- data.frame(
Firm = c("A", "B", "C"),
Postal_firm = c("20246", "67720", "77720"),
Longfirm = c("9.2","7.8", "8.1"),
Latfirm = c("42.6", "48.7", "50")
)
Data_bank <- data.frame(
Bank = c("AB", "AC"),
Postal_bank = c("50670", "88290"),
Longbank = c("-1.2","6.8"),
Latbank = c("48.7", "48.0"),
Assets = c("100", "200"))
# There are 2 banks and 3 firms
Data_firm %>%
inner_join(Data_bank, by=character()) %>%
mutate(across(starts_with(c("Lat","Long")), as.numeric)) %>%
rowwise() %>%
mutate(dist = distm(c(Longbank, Latbank), c(Longfirm, Latfirm),
fun = distHaversine))
#> Firm Postal_firm Longfirm Latfirm Bank Postal_bank Longbank Latbank Assets
#> 1 A 20246 9.2 42.6 AB 50670 -1.2 48.7 100
#> 2 A 20246 9.2 42.6 AC 88290 6.8 48.0 200
#> 3 B 67720 7.8 48.7 AB 50670 -1.2 48.7 100
#> 4 B 67720 7.8 48.7 AC 88290 6.8 48.0 200
#> 5 C 77720 8.1 50.0 AB 50670 -1.2 48.7 100
#> 6 C 77720 8.1 50.0 AC 88290 6.8 48.0 200
#> dist
#> 1 1054789.9
#> 2 629728.5
#> 3 660855.4
#> 4 107446.8
#> 5 689276.5
#> 6 242027.5
这里有一个数据table/geosphere方法
library(data.table)
library(geosphere)
setDT(Data_firm);setDT(Data_bank)
#create data.table of all combinations of bank and firm
ans <- CJ(firm = Data_firm$Firm,bank = Data_bank$Bank)
# join in the coordinates
ans[Data_firm, `:=`(lon_f = i.Longfirm, lat_f = i.Latfirm), on = .(firm = Firm)]
ans[Data_bank, `:=`(lon_b = i.Longbank, lat_b = i.Latbank, assets = i.Assets), on = .(bank = Bank)]
# set coordinates to numeric
cols <- grep("lat|lon|ass", names(ans), value = TRUE)
ans[, (cols) := lapply(.SD, as.numeric), .SDcols = cols]
# calculate rowwise distance between firm and bank
ans[, firm_to_bank := distHaversine(matrix(c(lon_f, lat_f), ncol = 2),
matrix(c(lon_b, lat_b), ncol = 2))]
# firm bank lon_f lat_f lon_b lat_b assets firm_to_bank
# 1: A AB 9.2 42.6 -1.2 48.7 100 1054789.9
# 2: A AC 9.2 42.6 6.8 48.0 200 629728.5
# 3: B AB 7.8 48.7 -1.2 48.7 100 660855.4
# 4: B AC 7.8 48.7 6.8 48.0 200 107446.8
# calculate average distance to bank by firm
ans[, avg_dist_to_bank := mean(firm_to_bank), by = .(firm)]
ans[, wavg_dist_to_bank := weighted.mean(firm_to_bank, assets), by = .(firm)]
# firm bank lon_f lat_f lon_b lat_b assets firm_to_bank avg_dist_to_bank wavg_dist_to_bank
# 1: A AB 9.2 42.6 -1.2 48.7 100 1054789.9 842259.2 771415.6
# 2: A AC 9.2 42.6 6.8 48.0 200 629728.5 842259.2 771415.6
# 3: B AB 7.8 48.7 -1.2 48.7 100 660855.4 384151.1 291916.3
# 4: B AC 7.8 48.7 6.8 48.0 200 107446.8 384151.1 291916.3