R中网络对象和Igraph对象不一致
Inconsistency between Network object and Igraph object in R
我正在使用 SNA(statnet 套件)和 R[=] 中的 igraph 开始描述性网络分析32=]。我想知道使用哪个套件来研究我的网络的不同属性,因为它们的功能略有不同,这使得它们不能完全互换。
不幸的是,我注意到2个包returns 不同的结果(例如,SNA识别的最大clique大小是8,而igraph达到17!。
我从同一个边缘列表开始构建这两个对象,存储为数据框。我可以使用 intergraph 得出相同的结论,但是 我如何定义哪个包创建了正确的网络?
编辑
有人要求我提供可重现的示例。提供数据将毫无用处,因为我无法创建它们的有意义的子样本。不幸的是,它们是私人数据,我不能传播它们。只是为了给出一个想法,下面我提供了我的 data.frame 的头部,其中包含在 class 'data.frame'
的代码 'fdi.edge.2003' 中命名的边缘列表
> dput(head(fdi.edge.2003, 10))
structure(list(iso_o = c("ARE", "ARE", "ARE", "ARE", "ARE", "ARE",
"ARE", "ARE", "ARE", "ARE"), iso_d = c("AUS", "AUT", "BGD", "BHR",
"CHE", "CHN", "EGY", "ESP", "FIN", "GBR"), year = c(2003, 2003,
2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003), all_num = c(1L,
2L, 1L, 3L, 1L, 2L, 3L, 1L, 1L, 1L), all_inv_tot = c(200, 298.6,
10, 21.1, 16.1, 30, 36.5, 5.6, 24.6, 8), all_inv_avg = c(200,
149.3, 10, 7.03333333333333, 16.1, 15, 12.1666666666667, 5.6,
24.6, 8), all_job_tot = c(438, 789, 81, 358, 18, 173, 67, 70,
40, 77), all_job_avg = c(438, 394.5, 81, 119.333335876465, 18,
86.5, 22.3333339691162, 70, 40, 77), greenf_num = c(1, 2, 0,
3, 1, 2, 3, 1, 0, 1), greenf_inv_tot = c(200, 298.6, 0, 21.1,
16.1, 30, 36.5, 5.6, 0, 8), greenf_inv_avg = c(200, 149.300003051758,
0, 7.03333330154419, 16.1000003814697, 15, 12.1666669845581,
5.59999990463257, 0, 8), greenf_job_tot = c(438, 789, 0, 358,
18, 173, 67, 70, 0, 77), greenf_job_avg = c(438, 394.5, 0, 119.333335876465, 18, 86.5, 22.3333339691162, 70, 0, 77), coloc_num = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0), coloc_inv_tot = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), coloc_inv_avg = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), coloc_job_tot = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), coloc_job_avg = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), expan_num = c(0, 0, 1, 0, 0, 0, 0, 0, 1, 0),
expan_inv_tot = c(0, 0, 10, 0, 0, 0, 0, 0, 24.6, 0), expan_inv_avg = c(0,
0, 10, 0, 0, 0, 0, 0, 24.6000003814697, 0), expan_job_tot = c(0,
0, 81, 0, 0, 0, 0, 0, 40, 0), expan_job_avg = c(0, 0, 81,
0, 0, 0, 0, 0, 40, 0), no_flow = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), grav_noinv = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0), dist = c(12092.1591796875, 4245.841796875, 3643.56079101562,
426.657623291016, 4830.7373046875, 5966.42724609375, 2370.06079101562,
5634.66748046875, 4570.3173828125, 5483.74462890625), distw = c(11543.8054501625,
4324.89105693665, 3561.5309768755, 490.192080768776, 4831.20286380844,
5975.25361008682, 2447.29938508511, 5605.06845714264, 4638.03680032616,
5594.32698249168), contig = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), comrelig = c(0.00378700019791722, 0.0094410004094243,
0.815205037593842, 0.901609003543854, 0.00625500036403537,
0.0227760020643473, 0.776296019554138, 0.00387900019995868,
0.00279700011014938, 0.0142930001020432), comlang_off = c(0L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L), comlang_ethno = c(0L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L), comcol = c(0L, 0L, 1L,
1L, 0L, 0L, 0L, 0L, 0L, 0L), colony = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L), comleg_pretrans = c(1L, 0L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 1L), comcur = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), tdiff = c(5.35000038146973, 3, 2, 1, 3,
4, 2, 3.5, 2, 4), conflict = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, 0L), smctry = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), subg = c(3, 3, 4, 4, 3, 4, 4, 3, 3, 3), same_reg = c(0,
0, 0, 1, 0, 0, 1, 0, 0, 0), same_inc = c(1, 1, 0, 1, 1, 0,
0, 1, 1, 1), simil_inc = c(1, 1, 0, 1, 1, 0, 0, 1, 1, 1)), datalabel = "fDIMarket data aggregati per origine-destinazione-anno", time.stamp = " 6 Dec 2017 16:07", formats = c("%9s",
"%44s", "%9s", "%44s", "%9.0g", "%9.0g", "%10.0g", "%10.0g",
"%10.0g", "%10.0g", "%8.0g", "%10.0g", "%9.0g", "%10.0g", "%9.0g",
"%8.0g", "%10.0g", "%9.0g", "%10.0g", "%9.0g", "%8.0g", "%10.0g",
"%9.0g", "%10.0g", "%9.0g", "%8.0g", "%9.0g", "%9.0g", "%9.0g",
"%9.0g", "%9.0g", "%9.0g", "%12.0g", "%9.0g", "%9.0g", "%9.0g",
"%12.0g", "%8.0g", "%9.0g", "%8.0g", "%8.0g", "%8.0g", "%9.0g",
"%8.0g", "%9.0g", "%9.0g", "%8.0g", "%9.0g", "%8.0g", "%8.0g",
"%9s", "%8.0g", "%8.0g", "%9.0g", "%9.0g", "%9.0g", "%8.0g",
"%8.0g", "%9.0g", "%8.0g", "%8.0g", "%8.0g", "%9s", "%9s", "%9s",
"%9s", "%8.0g", "%8.0g", "%9s", "%9.0g", "%9.0g", "%9.0g", "%9.0g",
"%9.0g", "%9.0g", "%9s", "%10.0g", "%9.0g", "%9.0g", "%9s", "%9.0g",
"%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9s", "%10.0g",
"%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g",
"%10.0g", "%10.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%40.0g",
"%40.0g", "%10.0g", "%10.0g", "%10.0g", "%10.0g", "%10.0g", "%10.0g",
"%9.0g", "%9.0g", "%8.0g", "%8.0g", "%9.0g", "%9.0g", "%9.0g",
"%9.0g", "%9.0g", "%9.0g", "%9.0g"), types = c(3L, 44L, 3L, 44L,
65527L, 65528L, 65526L, 65526L, 65526L, 65527L, 65526L, 65526L,
65527L, 65526L, 65527L, 65526L, 65526L, 65527L, 65526L, 65527L,
65526L, 65526L, 65527L, 65526L, 65527L, 65530L, 65527L, 65527L,
65526L, 65526L, 65526L, 65527L, 65528L, 65526L, 65526L, 65527L,
65528L, 65530L, 65527L, 65530L, 65530L, 65530L, 65530L, 65530L,
65530L, 65530L, 65530L, 65527L, 65530L, 65529L, 3L, 65530L, 65530L,
65530L, 65530L, 65530L, 65530L, 65530L, 65527L, 65530L, 65530L,
65530L, 2L, 2L, 2L, 2L, 65530L, 65530L, 2L, 65527L, 65527L, 65527L,
65527L, 65527L, 65527L, 3L, 65530L, 65527L, 65527L, 2L, 65527L,
65527L, 65527L, 65527L, 65527L, 65527L, 3L, 65530L, 65527L, 65527L,
65527L, 65526L, 65530L, 65530L, 65530L, 65529L, 65530L, 65530L,
65530L, 65530L, 65530L, 65530L, 65530L, 65526L, 65526L, 65530L,
65530L, 65526L, 65526L, 65527L, 65527L, 65530L, 65530L, 65527L,
65527L, 65527L, 65527L, 65527L, 65527L, 65527L), val.labels = structure(c("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "gspflag", "gspflag", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""), .Names = c("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "gspflag", "gspflag", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "")), var.labels = c("iso_3",
"Parent Country (Name)", "iso_3", "Host Country (Name)", "Reference year",
"Total # of Investments by origin in destination, in reference year",
"Total value of investments by origin in destination, in reference year",
"Average value of investments flown by origin in destination, in reference year",
"Total # of jobs created by year-origin-dest", "Average # of jobs created by investments by origin in destination, in reference ",
"Greenfield - Number of Projects in reference year year", "Greenfield - Total value by origin in destination, in reference year",
"Yearly average value of greenfield projects in the channel",
"Greenfield - Total # of jobs by origin in destination, in reference year",
"Average # of Jobs created by greenfield projects", "Co-location - Number of Projects in reference year year",
"Co-location - Total value by origin in destination, in reference year",
"Yearly average value of co-location projects in the channel",
"Co-location - Total # of jobs by origin in destination, in reference year",
"Average # of Jobs created by co-location projects", "Greenfield - Number of Projects in reference year year",
"Expansion - Total value by origin in destination, in reference year",
"Yearly average value of expansion projects in the channel",
"Expansion - Total # of jobs by origin in destination, in reference year",
"Average # of Jobs created by expansion projects", "Dummy = 1 if no investment in the year-cty_pair observation were recorded",
"Only gravity information - no investments. kept for merge with migration",
"simple distance (most populated cities, km)", "weighted distance (pop-wt, km)",
"Population, total in mn", "GDP (current US$)", "GDP per cap (current US$)",
"Area in sq. kms", "Population, total in mn", "GDP (current US$)",
"GDP per cap (current US$)", "Area in sq. kms", "1=Contiguity",
"1=Common religion", "1=Common official or primary language",
"1=Language is spoken by at least 9% of the population", "1=Common colonizer post 1945",
"1=Pair ever in colonial relationship", "1=Pair in colonial relationship post 1945",
"1=Common legal origins before transition", "1=Common legal origins after transition",
"1=Common currency", "nb of hours difference between ex and im",
"1=War", "Independence date if colony=1", "hegemon if cursib==1",
"1=Origin is current or former hegemon of destination", "1=Destination is current or former hegemon of origin",
"1=Trade from heg_o to colony", "1=Trade from colony to heg_d",
"1=Pair currently in colonial relationship", "1=Origin is GATT/WTO member",
"1=Destination is GATT/WTO member", "= 1 if a BIT ever existed between o and d",
"1=RTA (Source: WTO, 2015)", "1=FTA;2=Cust. Union;3=Common Market;4=Economic union (Baier & Bergstrand, 2009)",
"1=FTA (Source: Head, Mayer and Ries, 2010)", "origin legal system before transition",
"destination legal system before transition", "origin legal system after transition",
"destination legal system after transition", "1=ACP to EU", "1=EU to ACP",
"Income Category. H=high L=Low etc", "1 if income category = H",
"1 if income category = L", "1 if income category = LM", "1 if income category = UM",
"1 if fragile state", "1 if small state", "Regional 3-char Code - WB definition",
"1 if member of OECD", "1 if country is included among least developed countries",
"1 if no information on income level / region is available for FDI parent country",
"Income Category. H=high L=Low etc", "1 if income category = H",
"1 if income category = L", "1 if income category = LM", "1 if income category = UM",
"1 if fragile state", "1 if small state", "Regional 3-char Code - WB definition",
"1 if member of OECD", "1 if country is included among least developed countries",
"1 if no information on income level / region is available for FDI host country",
"simple distance between capitals (capitals, km)", "weighted distance (pop-wt, km) CES distances with theta=-1",
"1 if countries were or are the same country", "1=Pair ever in sibling relationship",
"1=Pair currently in sibling relationship", "severance year for pairs if sibling == 1",
"1=Pair ever in sibling relationship and conflict", "1=Common legal origin changed since transition",
"1=Non-reciprocal PTA ; 2=PTA (Source: Baier & Bergstrand, 2009)",
"1=Origin is donator", "1=Destination is donator", "report of changes in Rose data",
"report of changes in Rose data", "Cost of business start-up procedures (% of GNI per capita)",
"Cost of business start-up procedures (% of GNI per capita)",
"Start-up procedures to register a business (number)", "Start-up procedures to register a business (number)",
"Time required to start a business (days)", "Time required to start a business (days)",
"Days+Procs to start a business", "Days+Procs to start a business",
"1=Origin is a EU member", "1=Destination is a EU member", "group(iso_o iso_d)",
"", "", "", "", "", ""), version = 118L, label.table = structure(list(gspflag = structure(0:3, .Names = c("no gsp recorded in Rose",
"data directly from Rose", "changes in data from Rose", "Assumption that gsp continues after 1999"))), .Names = "gspflag"), expansion.fields = list(c("no_region_o", "note1", "Following small countries have no information because not independent countries: Anguilla; Christmas Island; Cocos and Keeling Islands; Cook Islands; Falkland Islands; French Guiana; Guadeloupe; Martinique; Montserrat; Netherland Antilles; Niue; Norfolk Island; Pitcairn; Reunion; Saint Helena, Ascension and Tristan d..; Saint Pierre and Miquelon; Tokelau; Wallis and Futuna; Western Sahara. notes no_region_o: Serbia and Montenegro has no information Beacuse is treated as separated countries. see variables no_flow and grav_noinv for details"), c("_dta", "note11", "Adesione OECD da fonte OECD "http://www.oecd.org/about/membersandpartners/list-oecd-member-countries.htm\""), c("_dta", "note10", "Classificazione per reddito e definizione fragile-small region Ufficiale World Bank - \"https://datahelpdesk.worldbank.org/knowledgebase/articles/906519-world-bank-country-and-lending-groups\""
), c("_dta", "note9", "Classificazione reddito e regioni come da WB World Development Indicators"), c("bit", "note1", "It is a dummy that states whether in a certain year there was a BIT between o and d and vice versa."), c("bit", "note0", "1"), c("_dta", "note8", "non ci sono duplicati nel dataset BIT perchè li ho eliminati. In questo modo, per ogni combinazione anno-cty_pair ho una sola osservazione. Mi serve per creare la variabile BIT nel do file principale"), c("_dta", "note7", "South sudan da problemi, è solo come destination country"), c("_dta", "note6", "I seguenti paesi hanno dati gravity ma non di investimenti (vedi variabile invYN e nota connessa). Li possiamo mantenere come cancellare. I paesi in questione sono: Anguilla - Netherland Antilles - Cocos and Keeling Islands - Cook Islands - Christmas Islands - Western Sahara - Falkland Islands - Faeroe Islands - Gibraltar French Guiana Kiribati - Marshall Islands - Northern Mariana Islands - Montserrat - Norfolk Islands - Niue - Nauru - Pitcairn - Palau - Saint Helena... - San Marino - Saint Pierre et Miquelon Tokelau - Tonga - Tuvalu - British Virgin Islands - Vanuatu - Wallis and Futuna."), c("_dta", "note5", "geo_dist database scaricato da \"http://www.cepii.fr/CEPII/en/bdd_modele/presentation.asp?id=6\" il 28 febbraio 2017. Updated 10 febbraio 2017"), c("_dta", "note4", "gravity database scaricato da \"http://www.cepii.fr/CEPII/en/bdd_modele/presentation.asp?id=8\" il 28 febbraio 2017. Updated 10 febbraio 2017"), c("gdp_d", "destring", "Characters removed were:"), c("pop_d","destring", "Characters removed were:"), c("gdp_o", "destring", "Characters removed were:"), c("pop_o", "destring", "Characters removed were:"), c("year", "destring", "Characters removed were:"), c("_dta","__JVarLab", "Indicator Name"), c("_dta", "ReS_i", "iso3"), c("_dta", "ReS_ver", "v.2"), c("_dta", "ReS_j", "iso_o"), c("_dta", "ReS_str", "1"), c("_dta", "ReS_Xij", "p"), c("fta_bb", "destring", "Characters removed were:"), c("_dta", "_TSitrvl", "1"), c("_dta", "_TSpanel", "ccode"),
c("_dta", "_TStvar", "year"), c("_dta", "_dta", "st"), c("_dta",
"st_id", "leadnew"), c("_dta", "st_d", "leadgone"), c("_dta",
"st_t0", "time0"), c("_dta", "st_t", "tenplus"), c("_dta",
"_lang_list", "default"), c("_dta", "_lang_c", "default"),
c("_dta", "st_bs", "1"), c("_dta", "st_s", "1"), c("_dta",
"st_o", "0"), c("_dta", "st_bd", "failure"), c("_dta", "st_bt",
"dur8099"), c("_dta", "st_ver", "2"), c("_dta", "tis", "year"
), c("_dta", "iis", "shcode56"), c("_dta", "__xi__Vars__To__Drop__",
"_Icontnt1_2 _Icontnt1_3 _Icontnt1_4 _Icontnt1_5 _Icontnt1_6 _Icontnt1_7"
), c("_dta", "__xi__Vars__Prefix__", "_I _I _I _I _I _I"),
c("no_flow", "note1", "Questa variabile mi dice se ho investimenti (=0) nell'osservazione year-cty_pair oppure se ho solo la coppia paese."
), c("no_flow", "note0", "1"), c("_dta", "note3", "codici ISO (versione 3166 da International Organisation for Standardization - \"https://www.iso.org\""
), c("_dta", "note2", "Investimenti FdiMarket from FDIIntelligence - \"http://www.fdiintelligence.com/\", FDIMarket Database"
), c("_dta", "note1", "Dataset Disaggregato a livello di singola impresa. Creato da Filippo Santi - Versione: 1 - Data: 9 Nov 2017."
), c("_dta", "note0", "11"), c("no_region_o", "note0", "2"
), c("no_region_o", "note2", "Following small countries have no information because not independent countries: Anguilla; Christmas Island; Cocos and Keeling Islands; Cook Islands; Falkland Islands; French Guiana; Guadeloupe; Martinique; Montserrat; Netherland Antilles; Niue; Norfolk Island; Pitcairn; Reunion; Saint Helena, Ascension and Tristan d..; Saint Pierre and Miquelon; Tokelau; Wallis and Futuna; Western Sahara. notes no_region_o: Serbia and Montenegro has no information Beacuse is treated as separated countries. see variables no_flow and grav_noinv for details"
), c("no_region_d", "note0", "1"), c("no_region_d", "note1",
"See notes to no_region_o")), byteorder = "LSF", .Names = c("iso_o",
"iso_d", "year", "all_num", "all_inv_tot", "all_inv_avg", "all_job_tot",
"all_job_avg", "greenf_num", "greenf_inv_tot", "greenf_inv_avg",
"greenf_job_tot", "greenf_job_avg", "coloc_num", "coloc_inv_tot",
"coloc_inv_avg", "coloc_job_tot", "coloc_job_avg", "expan_num",
"expan_inv_tot", "expan_inv_avg", "expan_job_tot", "expan_job_avg",
"no_flow", "grav_noinv", "dist", "distw", "contig", "comrelig",
"comlang_off", "comlang_ethno", "comcol", "colony", "comleg_pretrans",
"comcur", "tdiff", "conflict", "smctry", "subg", "same_reg",
"same_inc", "simil_inc"), n = 228L, row.names = c(NA, 10L), class = "data.frame")
下面是我使用的代码
# Create network (STATNET)
library(network)
library(sna)
fdi.net.2003 <- network(fdi.edge.2003, matrix.type = "edgelist",
directed = T,
ignore.eval = F)
# Computing DIAMETER and APL
# APL and DIAMETER computed in statnet require to compute the geodist
# inf.replace = 0 substitute distance in unconnected components to be 0
geod.2003 <- geodist(fdi.net.2015, inf.replace = NA)
# The length of the shortest path for all pairs of nodes.
geod.2003$gdist
# The number of shortest path for all pairs of nodes.
geod.2003$counts
# Shortest Path Matrix
dist_mat = geod.2003$gdist
hist(dist_mat)
diam <- max(dist_mat, na.rm = T) # 6
apl <- mean(dist_mat, na.rm = T)
# (Adapted from https://dnac.ssri.duke.edu/rlabs/2017/02_descriptive_statistics.php)
# REPLICATE WITH IGRAPH
detach(package:sna)
detach(package:network)
library(igraph)
fdi.graph.2003 <- graph_from_data_frame(fdi.edge.2003, directed = T, vertices = fdi.attr.2003)
diam2 <- diameter(fdi.graph.2003, directed = TRUE, unconnected = TRUE) #7
identical(diam, diam2) # FALSE
# Using intergraph
library(intergraph)
fdi.graph.2003.int <- asIgraph(fdi.net.2003)
class(fdi.graph.2003.int) # igraph
diam3 <- diameter(fdi.graph.2003.int, directed = TRUE, unconnected = TRUE) #6
identical(diam, diam3) # TRUE
这只是一个例子。在计算集团普查时出现了其他更相关的问题:根据 'network' class 对象,我得到了大小为 8 的最大集团。相反,对于 'igraph' 我得到了最大17 人的小集团!
知道发生了什么事吗?
我知道这来晚了,我希望你已经找到了答案! Connections——一份由国际社会网络分析网络制作的期刊——有一篇文章指出了针对相同措施的不同软件包之间的差异。您可以在这里找到它:https://www.exeley.com/connections/doi/10.21307/connections-2017-002
我是 SNA 的新手,所以我不想自以为是专家,但在尝试查找 "correct" 结果时我会谨慎行事。 Indegree 和 outdegree 很简单,无论包如何,结果都应该相同。不过,我的理解是 clique、cluster 等比较模糊,最好的衡量标准在一定程度上取决于研究问题。
物有所值,但我建议确保基本网络结构是一致的——包之间的边、入度、出度不应该不同——然后决定哪个包的方法与你的最相关学习。
祝你好运!
我正在使用 SNA(statnet 套件)和 R[=] 中的 igraph 开始描述性网络分析32=]。我想知道使用哪个套件来研究我的网络的不同属性,因为它们的功能略有不同,这使得它们不能完全互换。
不幸的是,我注意到2个包returns 不同的结果(例如,SNA识别的最大clique大小是8,而igraph达到17!。 我从同一个边缘列表开始构建这两个对象,存储为数据框。我可以使用 intergraph 得出相同的结论,但是 我如何定义哪个包创建了正确的网络?
编辑
有人要求我提供可重现的示例。提供数据将毫无用处,因为我无法创建它们的有意义的子样本。不幸的是,它们是私人数据,我不能传播它们。只是为了给出一个想法,下面我提供了我的 data.frame 的头部,其中包含在 class 'data.frame'
的代码 'fdi.edge.2003' 中命名的边缘列表 > dput(head(fdi.edge.2003, 10))
structure(list(iso_o = c("ARE", "ARE", "ARE", "ARE", "ARE", "ARE",
"ARE", "ARE", "ARE", "ARE"), iso_d = c("AUS", "AUT", "BGD", "BHR",
"CHE", "CHN", "EGY", "ESP", "FIN", "GBR"), year = c(2003, 2003,
2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003), all_num = c(1L,
2L, 1L, 3L, 1L, 2L, 3L, 1L, 1L, 1L), all_inv_tot = c(200, 298.6,
10, 21.1, 16.1, 30, 36.5, 5.6, 24.6, 8), all_inv_avg = c(200,
149.3, 10, 7.03333333333333, 16.1, 15, 12.1666666666667, 5.6,
24.6, 8), all_job_tot = c(438, 789, 81, 358, 18, 173, 67, 70,
40, 77), all_job_avg = c(438, 394.5, 81, 119.333335876465, 18,
86.5, 22.3333339691162, 70, 40, 77), greenf_num = c(1, 2, 0,
3, 1, 2, 3, 1, 0, 1), greenf_inv_tot = c(200, 298.6, 0, 21.1,
16.1, 30, 36.5, 5.6, 0, 8), greenf_inv_avg = c(200, 149.300003051758,
0, 7.03333330154419, 16.1000003814697, 15, 12.1666669845581,
5.59999990463257, 0, 8), greenf_job_tot = c(438, 789, 0, 358,
18, 173, 67, 70, 0, 77), greenf_job_avg = c(438, 394.5, 0, 119.333335876465, 18, 86.5, 22.3333339691162, 70, 0, 77), coloc_num = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0), coloc_inv_tot = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), coloc_inv_avg = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), coloc_job_tot = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), coloc_job_avg = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), expan_num = c(0, 0, 1, 0, 0, 0, 0, 0, 1, 0),
expan_inv_tot = c(0, 0, 10, 0, 0, 0, 0, 0, 24.6, 0), expan_inv_avg = c(0,
0, 10, 0, 0, 0, 0, 0, 24.6000003814697, 0), expan_job_tot = c(0,
0, 81, 0, 0, 0, 0, 0, 40, 0), expan_job_avg = c(0, 0, 81,
0, 0, 0, 0, 0, 40, 0), no_flow = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), grav_noinv = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0), dist = c(12092.1591796875, 4245.841796875, 3643.56079101562,
426.657623291016, 4830.7373046875, 5966.42724609375, 2370.06079101562,
5634.66748046875, 4570.3173828125, 5483.74462890625), distw = c(11543.8054501625,
4324.89105693665, 3561.5309768755, 490.192080768776, 4831.20286380844,
5975.25361008682, 2447.29938508511, 5605.06845714264, 4638.03680032616,
5594.32698249168), contig = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), comrelig = c(0.00378700019791722, 0.0094410004094243,
0.815205037593842, 0.901609003543854, 0.00625500036403537,
0.0227760020643473, 0.776296019554138, 0.00387900019995868,
0.00279700011014938, 0.0142930001020432), comlang_off = c(0L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L), comlang_ethno = c(0L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L), comcol = c(0L, 0L, 1L,
1L, 0L, 0L, 0L, 0L, 0L, 0L), colony = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L), comleg_pretrans = c(1L, 0L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 1L), comcur = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), tdiff = c(5.35000038146973, 3, 2, 1, 3,
4, 2, 3.5, 2, 4), conflict = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, 0L), smctry = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), subg = c(3, 3, 4, 4, 3, 4, 4, 3, 3, 3), same_reg = c(0,
0, 0, 1, 0, 0, 1, 0, 0, 0), same_inc = c(1, 1, 0, 1, 1, 0,
0, 1, 1, 1), simil_inc = c(1, 1, 0, 1, 1, 0, 0, 1, 1, 1)), datalabel = "fDIMarket data aggregati per origine-destinazione-anno", time.stamp = " 6 Dec 2017 16:07", formats = c("%9s",
"%44s", "%9s", "%44s", "%9.0g", "%9.0g", "%10.0g", "%10.0g",
"%10.0g", "%10.0g", "%8.0g", "%10.0g", "%9.0g", "%10.0g", "%9.0g",
"%8.0g", "%10.0g", "%9.0g", "%10.0g", "%9.0g", "%8.0g", "%10.0g",
"%9.0g", "%10.0g", "%9.0g", "%8.0g", "%9.0g", "%9.0g", "%9.0g",
"%9.0g", "%9.0g", "%9.0g", "%12.0g", "%9.0g", "%9.0g", "%9.0g",
"%12.0g", "%8.0g", "%9.0g", "%8.0g", "%8.0g", "%8.0g", "%9.0g",
"%8.0g", "%9.0g", "%9.0g", "%8.0g", "%9.0g", "%8.0g", "%8.0g",
"%9s", "%8.0g", "%8.0g", "%9.0g", "%9.0g", "%9.0g", "%8.0g",
"%8.0g", "%9.0g", "%8.0g", "%8.0g", "%8.0g", "%9s", "%9s", "%9s",
"%9s", "%8.0g", "%8.0g", "%9s", "%9.0g", "%9.0g", "%9.0g", "%9.0g",
"%9.0g", "%9.0g", "%9s", "%10.0g", "%9.0g", "%9.0g", "%9s", "%9.0g",
"%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9s", "%10.0g",
"%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g",
"%10.0g", "%10.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%40.0g",
"%40.0g", "%10.0g", "%10.0g", "%10.0g", "%10.0g", "%10.0g", "%10.0g",
"%9.0g", "%9.0g", "%8.0g", "%8.0g", "%9.0g", "%9.0g", "%9.0g",
"%9.0g", "%9.0g", "%9.0g", "%9.0g"), types = c(3L, 44L, 3L, 44L,
65527L, 65528L, 65526L, 65526L, 65526L, 65527L, 65526L, 65526L,
65527L, 65526L, 65527L, 65526L, 65526L, 65527L, 65526L, 65527L,
65526L, 65526L, 65527L, 65526L, 65527L, 65530L, 65527L, 65527L,
65526L, 65526L, 65526L, 65527L, 65528L, 65526L, 65526L, 65527L,
65528L, 65530L, 65527L, 65530L, 65530L, 65530L, 65530L, 65530L,
65530L, 65530L, 65530L, 65527L, 65530L, 65529L, 3L, 65530L, 65530L,
65530L, 65530L, 65530L, 65530L, 65530L, 65527L, 65530L, 65530L,
65530L, 2L, 2L, 2L, 2L, 65530L, 65530L, 2L, 65527L, 65527L, 65527L,
65527L, 65527L, 65527L, 3L, 65530L, 65527L, 65527L, 2L, 65527L,
65527L, 65527L, 65527L, 65527L, 65527L, 3L, 65530L, 65527L, 65527L,
65527L, 65526L, 65530L, 65530L, 65530L, 65529L, 65530L, 65530L,
65530L, 65530L, 65530L, 65530L, 65530L, 65526L, 65526L, 65530L,
65530L, 65526L, 65526L, 65527L, 65527L, 65530L, 65530L, 65527L,
65527L, 65527L, 65527L, 65527L, 65527L, 65527L), val.labels = structure(c("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "gspflag", "gspflag", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""), .Names = c("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "gspflag", "gspflag", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "")), var.labels = c("iso_3",
"Parent Country (Name)", "iso_3", "Host Country (Name)", "Reference year",
"Total # of Investments by origin in destination, in reference year",
"Total value of investments by origin in destination, in reference year",
"Average value of investments flown by origin in destination, in reference year",
"Total # of jobs created by year-origin-dest", "Average # of jobs created by investments by origin in destination, in reference ",
"Greenfield - Number of Projects in reference year year", "Greenfield - Total value by origin in destination, in reference year",
"Yearly average value of greenfield projects in the channel",
"Greenfield - Total # of jobs by origin in destination, in reference year",
"Average # of Jobs created by greenfield projects", "Co-location - Number of Projects in reference year year",
"Co-location - Total value by origin in destination, in reference year",
"Yearly average value of co-location projects in the channel",
"Co-location - Total # of jobs by origin in destination, in reference year",
"Average # of Jobs created by co-location projects", "Greenfield - Number of Projects in reference year year",
"Expansion - Total value by origin in destination, in reference year",
"Yearly average value of expansion projects in the channel",
"Expansion - Total # of jobs by origin in destination, in reference year",
"Average # of Jobs created by expansion projects", "Dummy = 1 if no investment in the year-cty_pair observation were recorded",
"Only gravity information - no investments. kept for merge with migration",
"simple distance (most populated cities, km)", "weighted distance (pop-wt, km)",
"Population, total in mn", "GDP (current US$)", "GDP per cap (current US$)",
"Area in sq. kms", "Population, total in mn", "GDP (current US$)",
"GDP per cap (current US$)", "Area in sq. kms", "1=Contiguity",
"1=Common religion", "1=Common official or primary language",
"1=Language is spoken by at least 9% of the population", "1=Common colonizer post 1945",
"1=Pair ever in colonial relationship", "1=Pair in colonial relationship post 1945",
"1=Common legal origins before transition", "1=Common legal origins after transition",
"1=Common currency", "nb of hours difference between ex and im",
"1=War", "Independence date if colony=1", "hegemon if cursib==1",
"1=Origin is current or former hegemon of destination", "1=Destination is current or former hegemon of origin",
"1=Trade from heg_o to colony", "1=Trade from colony to heg_d",
"1=Pair currently in colonial relationship", "1=Origin is GATT/WTO member",
"1=Destination is GATT/WTO member", "= 1 if a BIT ever existed between o and d",
"1=RTA (Source: WTO, 2015)", "1=FTA;2=Cust. Union;3=Common Market;4=Economic union (Baier & Bergstrand, 2009)",
"1=FTA (Source: Head, Mayer and Ries, 2010)", "origin legal system before transition",
"destination legal system before transition", "origin legal system after transition",
"destination legal system after transition", "1=ACP to EU", "1=EU to ACP",
"Income Category. H=high L=Low etc", "1 if income category = H",
"1 if income category = L", "1 if income category = LM", "1 if income category = UM",
"1 if fragile state", "1 if small state", "Regional 3-char Code - WB definition",
"1 if member of OECD", "1 if country is included among least developed countries",
"1 if no information on income level / region is available for FDI parent country",
"Income Category. H=high L=Low etc", "1 if income category = H",
"1 if income category = L", "1 if income category = LM", "1 if income category = UM",
"1 if fragile state", "1 if small state", "Regional 3-char Code - WB definition",
"1 if member of OECD", "1 if country is included among least developed countries",
"1 if no information on income level / region is available for FDI host country",
"simple distance between capitals (capitals, km)", "weighted distance (pop-wt, km) CES distances with theta=-1",
"1 if countries were or are the same country", "1=Pair ever in sibling relationship",
"1=Pair currently in sibling relationship", "severance year for pairs if sibling == 1",
"1=Pair ever in sibling relationship and conflict", "1=Common legal origin changed since transition",
"1=Non-reciprocal PTA ; 2=PTA (Source: Baier & Bergstrand, 2009)",
"1=Origin is donator", "1=Destination is donator", "report of changes in Rose data",
"report of changes in Rose data", "Cost of business start-up procedures (% of GNI per capita)",
"Cost of business start-up procedures (% of GNI per capita)",
"Start-up procedures to register a business (number)", "Start-up procedures to register a business (number)",
"Time required to start a business (days)", "Time required to start a business (days)",
"Days+Procs to start a business", "Days+Procs to start a business",
"1=Origin is a EU member", "1=Destination is a EU member", "group(iso_o iso_d)",
"", "", "", "", "", ""), version = 118L, label.table = structure(list(gspflag = structure(0:3, .Names = c("no gsp recorded in Rose",
"data directly from Rose", "changes in data from Rose", "Assumption that gsp continues after 1999"))), .Names = "gspflag"), expansion.fields = list(c("no_region_o", "note1", "Following small countries have no information because not independent countries: Anguilla; Christmas Island; Cocos and Keeling Islands; Cook Islands; Falkland Islands; French Guiana; Guadeloupe; Martinique; Montserrat; Netherland Antilles; Niue; Norfolk Island; Pitcairn; Reunion; Saint Helena, Ascension and Tristan d..; Saint Pierre and Miquelon; Tokelau; Wallis and Futuna; Western Sahara. notes no_region_o: Serbia and Montenegro has no information Beacuse is treated as separated countries. see variables no_flow and grav_noinv for details"), c("_dta", "note11", "Adesione OECD da fonte OECD "http://www.oecd.org/about/membersandpartners/list-oecd-member-countries.htm\""), c("_dta", "note10", "Classificazione per reddito e definizione fragile-small region Ufficiale World Bank - \"https://datahelpdesk.worldbank.org/knowledgebase/articles/906519-world-bank-country-and-lending-groups\""
), c("_dta", "note9", "Classificazione reddito e regioni come da WB World Development Indicators"), c("bit", "note1", "It is a dummy that states whether in a certain year there was a BIT between o and d and vice versa."), c("bit", "note0", "1"), c("_dta", "note8", "non ci sono duplicati nel dataset BIT perchè li ho eliminati. In questo modo, per ogni combinazione anno-cty_pair ho una sola osservazione. Mi serve per creare la variabile BIT nel do file principale"), c("_dta", "note7", "South sudan da problemi, è solo come destination country"), c("_dta", "note6", "I seguenti paesi hanno dati gravity ma non di investimenti (vedi variabile invYN e nota connessa). Li possiamo mantenere come cancellare. I paesi in questione sono: Anguilla - Netherland Antilles - Cocos and Keeling Islands - Cook Islands - Christmas Islands - Western Sahara - Falkland Islands - Faeroe Islands - Gibraltar French Guiana Kiribati - Marshall Islands - Northern Mariana Islands - Montserrat - Norfolk Islands - Niue - Nauru - Pitcairn - Palau - Saint Helena... - San Marino - Saint Pierre et Miquelon Tokelau - Tonga - Tuvalu - British Virgin Islands - Vanuatu - Wallis and Futuna."), c("_dta", "note5", "geo_dist database scaricato da \"http://www.cepii.fr/CEPII/en/bdd_modele/presentation.asp?id=6\" il 28 febbraio 2017. Updated 10 febbraio 2017"), c("_dta", "note4", "gravity database scaricato da \"http://www.cepii.fr/CEPII/en/bdd_modele/presentation.asp?id=8\" il 28 febbraio 2017. Updated 10 febbraio 2017"), c("gdp_d", "destring", "Characters removed were:"), c("pop_d","destring", "Characters removed were:"), c("gdp_o", "destring", "Characters removed were:"), c("pop_o", "destring", "Characters removed were:"), c("year", "destring", "Characters removed were:"), c("_dta","__JVarLab", "Indicator Name"), c("_dta", "ReS_i", "iso3"), c("_dta", "ReS_ver", "v.2"), c("_dta", "ReS_j", "iso_o"), c("_dta", "ReS_str", "1"), c("_dta", "ReS_Xij", "p"), c("fta_bb", "destring", "Characters removed were:"), c("_dta", "_TSitrvl", "1"), c("_dta", "_TSpanel", "ccode"),
c("_dta", "_TStvar", "year"), c("_dta", "_dta", "st"), c("_dta",
"st_id", "leadnew"), c("_dta", "st_d", "leadgone"), c("_dta",
"st_t0", "time0"), c("_dta", "st_t", "tenplus"), c("_dta",
"_lang_list", "default"), c("_dta", "_lang_c", "default"),
c("_dta", "st_bs", "1"), c("_dta", "st_s", "1"), c("_dta",
"st_o", "0"), c("_dta", "st_bd", "failure"), c("_dta", "st_bt",
"dur8099"), c("_dta", "st_ver", "2"), c("_dta", "tis", "year"
), c("_dta", "iis", "shcode56"), c("_dta", "__xi__Vars__To__Drop__",
"_Icontnt1_2 _Icontnt1_3 _Icontnt1_4 _Icontnt1_5 _Icontnt1_6 _Icontnt1_7"
), c("_dta", "__xi__Vars__Prefix__", "_I _I _I _I _I _I"),
c("no_flow", "note1", "Questa variabile mi dice se ho investimenti (=0) nell'osservazione year-cty_pair oppure se ho solo la coppia paese."
), c("no_flow", "note0", "1"), c("_dta", "note3", "codici ISO (versione 3166 da International Organisation for Standardization - \"https://www.iso.org\""
), c("_dta", "note2", "Investimenti FdiMarket from FDIIntelligence - \"http://www.fdiintelligence.com/\", FDIMarket Database"
), c("_dta", "note1", "Dataset Disaggregato a livello di singola impresa. Creato da Filippo Santi - Versione: 1 - Data: 9 Nov 2017."
), c("_dta", "note0", "11"), c("no_region_o", "note0", "2"
), c("no_region_o", "note2", "Following small countries have no information because not independent countries: Anguilla; Christmas Island; Cocos and Keeling Islands; Cook Islands; Falkland Islands; French Guiana; Guadeloupe; Martinique; Montserrat; Netherland Antilles; Niue; Norfolk Island; Pitcairn; Reunion; Saint Helena, Ascension and Tristan d..; Saint Pierre and Miquelon; Tokelau; Wallis and Futuna; Western Sahara. notes no_region_o: Serbia and Montenegro has no information Beacuse is treated as separated countries. see variables no_flow and grav_noinv for details"
), c("no_region_d", "note0", "1"), c("no_region_d", "note1",
"See notes to no_region_o")), byteorder = "LSF", .Names = c("iso_o",
"iso_d", "year", "all_num", "all_inv_tot", "all_inv_avg", "all_job_tot",
"all_job_avg", "greenf_num", "greenf_inv_tot", "greenf_inv_avg",
"greenf_job_tot", "greenf_job_avg", "coloc_num", "coloc_inv_tot",
"coloc_inv_avg", "coloc_job_tot", "coloc_job_avg", "expan_num",
"expan_inv_tot", "expan_inv_avg", "expan_job_tot", "expan_job_avg",
"no_flow", "grav_noinv", "dist", "distw", "contig", "comrelig",
"comlang_off", "comlang_ethno", "comcol", "colony", "comleg_pretrans",
"comcur", "tdiff", "conflict", "smctry", "subg", "same_reg",
"same_inc", "simil_inc"), n = 228L, row.names = c(NA, 10L), class = "data.frame")
下面是我使用的代码
# Create network (STATNET)
library(network)
library(sna)
fdi.net.2003 <- network(fdi.edge.2003, matrix.type = "edgelist",
directed = T,
ignore.eval = F)
# Computing DIAMETER and APL
# APL and DIAMETER computed in statnet require to compute the geodist
# inf.replace = 0 substitute distance in unconnected components to be 0
geod.2003 <- geodist(fdi.net.2015, inf.replace = NA)
# The length of the shortest path for all pairs of nodes.
geod.2003$gdist
# The number of shortest path for all pairs of nodes.
geod.2003$counts
# Shortest Path Matrix
dist_mat = geod.2003$gdist
hist(dist_mat)
diam <- max(dist_mat, na.rm = T) # 6
apl <- mean(dist_mat, na.rm = T)
# (Adapted from https://dnac.ssri.duke.edu/rlabs/2017/02_descriptive_statistics.php)
# REPLICATE WITH IGRAPH
detach(package:sna)
detach(package:network)
library(igraph)
fdi.graph.2003 <- graph_from_data_frame(fdi.edge.2003, directed = T, vertices = fdi.attr.2003)
diam2 <- diameter(fdi.graph.2003, directed = TRUE, unconnected = TRUE) #7
identical(diam, diam2) # FALSE
# Using intergraph
library(intergraph)
fdi.graph.2003.int <- asIgraph(fdi.net.2003)
class(fdi.graph.2003.int) # igraph
diam3 <- diameter(fdi.graph.2003.int, directed = TRUE, unconnected = TRUE) #6
identical(diam, diam3) # TRUE
这只是一个例子。在计算集团普查时出现了其他更相关的问题:根据 'network' class 对象,我得到了大小为 8 的最大集团。相反,对于 'igraph' 我得到了最大17 人的小集团!
知道发生了什么事吗?
我知道这来晚了,我希望你已经找到了答案! Connections——一份由国际社会网络分析网络制作的期刊——有一篇文章指出了针对相同措施的不同软件包之间的差异。您可以在这里找到它:https://www.exeley.com/connections/doi/10.21307/connections-2017-002
我是 SNA 的新手,所以我不想自以为是专家,但在尝试查找 "correct" 结果时我会谨慎行事。 Indegree 和 outdegree 很简单,无论包如何,结果都应该相同。不过,我的理解是 clique、cluster 等比较模糊,最好的衡量标准在一定程度上取决于研究问题。
物有所值,但我建议确保基本网络结构是一致的——包之间的边、入度、出度不应该不同——然后决定哪个包的方法与你的最相关学习。
祝你好运!