Python: 如何将list of list的元素转换为无向图?
Python: how to convert elements of a list of lists into an undirected graph?
我有一个程序可以检索 PubMed 出版物的列表,并希望构建一个共同作者图,这意味着对于每篇文章,我想将每个作者(如果不存在)添加为一个顶点并添加一个每个合作者之间的无向边(或增加其权重)。
我设法编写了程序的第一个检索每个出版物的作者列表的程序,并了解我可以使用 NetworkX 库构建图形(然后将其导出到 Gephi 的 GraphML)但我无法理解关于如何将 "list of lists" 转换为图形。
下面是我的代码。非常感谢。
### if needed install the required modules
### python3 -m pip install biopython
### python3 -m pip install numpy
from Bio import Entrez
from Bio import Medline
Entrez.email = "rja@it.com"
handle = Entrez.esearch(db="pubmed", term='("lung diseases, interstitial"[MeSH Terms] NOT "pneumoconiosis"[MeSH Terms]) AND "artificial intelligence"[MeSH Terms] AND "humans"[MeSH Terms]', retmax="1000", sort="relevance", retmode="xml")
records = Entrez.read(handle)
ids = records['IdList']
h = Entrez.efetch(db='pubmed', id=ids, rettype='medline', retmode='text')
#now h holds all of the articles and their sections
records = Medline.parse(h)
# initialize an empty vector for the authors
authors = []
# iterate through all articles
for record in records:
#for each article (record) get the authors list
au = record.get('AU', '?')
# now from the author list iterate through each author
for a in au:
if a not in authors:
authors.append(a)
# following is just to show the alphabetic list of all non repeating
# authors sorted alphabetically (there should become my graph nodes)
authors.sort()
print('Authors: {0}'.format(', '.join(authors)))
太棒了 - 代码是 运行,所以数据结构很清晰!作为一种方法,我们为 articles/authors 和 authors/co-authors.
构建连接矩阵
作者名单:
如果你想描述文章和作者之间的关系,我想你需要每篇文章的作者列表
authors = []
author_lists = [] # <--- new
for record in records:
au = record.get('AU', '?')
author_lists.append(au) # <--- new
for a in au:
if a not in authors: authors.append(a)
authors.sort()
print(authors)
numpy, pandas matplotlib - 正是我习惯的工作方式
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
AU = np.array(authors) # authors as np-array
NA = AU.shape[0] # number of authors
NL = len(author_lists) # number of articles/author lists
AUL = np.array(author_lists) # author lists as np-array
print('NA, NL', NA,NL)
连通性articles/authors
CON = np.zeros((NL,NA),dtype=int) # initializes connectivity matrix
for j in range(NL): # run through the article's author list
aul = np.array(AUL[j]) # get a single author list as np-array
z = np.zeros((NA),dtype=int)
for k in range(len(aul)): # get a singel author
z += (AU==aul[k]) # get it's position in the AU, add it up
CON[j,:] = z # insert the result in the connectivity matrix
#---- grafics --------
fig = plt.figure(figsize=(20,10)) ;
plt.spy(CON, marker ='s', color='chartreuse', markersize=5)
plt.xlabel('Authors'); plt.ylabel('Articles'); plt.title('Authors of the articles', fontweight='bold')
plt.show()
连通性authors/co-authors,得到的矩阵是对称的
df = pd.DataFrame(CON) # let's use pandas for the following step
ACON = np.zeros((NA,NA)) # initialize the conncetivity matrix
for j in range(NA): # run through the authors
df_a = df[df.iloc[:, j] >0] # give all rows with author j involved
w = np.array(df_a.sum()) # sum the rows, store it in np-array
ACON[j] = w # insert it in the connectivity matrix
#---- grafics --------
fig = plt.figure(figsize=(10,10)) ;
plt.spy(ACON, marker ='s', color='chartreuse', markersize=3)
plt.xlabel('Authors'); plt.ylabel('Authors'); plt.title('Authors that are co-authors', fontweight='bold')
plt.show()
对于带有Networkx的图形,我认为你需要明确你想要代表什么,因为有很多点和很多可能性(也许你post一个例子?)。下面仅绘制了一些作者圈子。
import networkx as nx
def set_edges(Q):
case = 'A'
if case=='A':
Q1 = np.roll(Q,shift=1)
Edges = np.vstack((Q,Q1)).T
return Edges
Q = nx.Graph()
Q.clear()
AT = np.triu(ACON) # only the tridiagonal is needed
fig = plt.figure(figsize=(7,7)) ;
for k in range (9):
iA = np.argwhere(AT[k]>0).ravel() # get the indices with AT{k}>0
Edges = set_edges(iA) # select the involved nodes and set the edges
Q.add_edges_from(Edges, with_labels=True)
nx.draw(Q, alpha=0.5)
plt.title('Co-author-ship', fontweight='bold')
plt.show()
我有一个程序可以检索 PubMed 出版物的列表,并希望构建一个共同作者图,这意味着对于每篇文章,我想将每个作者(如果不存在)添加为一个顶点并添加一个每个合作者之间的无向边(或增加其权重)。
我设法编写了程序的第一个检索每个出版物的作者列表的程序,并了解我可以使用 NetworkX 库构建图形(然后将其导出到 Gephi 的 GraphML)但我无法理解关于如何将 "list of lists" 转换为图形。
下面是我的代码。非常感谢。
### if needed install the required modules
### python3 -m pip install biopython
### python3 -m pip install numpy
from Bio import Entrez
from Bio import Medline
Entrez.email = "rja@it.com"
handle = Entrez.esearch(db="pubmed", term='("lung diseases, interstitial"[MeSH Terms] NOT "pneumoconiosis"[MeSH Terms]) AND "artificial intelligence"[MeSH Terms] AND "humans"[MeSH Terms]', retmax="1000", sort="relevance", retmode="xml")
records = Entrez.read(handle)
ids = records['IdList']
h = Entrez.efetch(db='pubmed', id=ids, rettype='medline', retmode='text')
#now h holds all of the articles and their sections
records = Medline.parse(h)
# initialize an empty vector for the authors
authors = []
# iterate through all articles
for record in records:
#for each article (record) get the authors list
au = record.get('AU', '?')
# now from the author list iterate through each author
for a in au:
if a not in authors:
authors.append(a)
# following is just to show the alphabetic list of all non repeating
# authors sorted alphabetically (there should become my graph nodes)
authors.sort()
print('Authors: {0}'.format(', '.join(authors)))
太棒了 - 代码是 运行,所以数据结构很清晰!作为一种方法,我们为 articles/authors 和 authors/co-authors.
构建连接矩阵作者名单: 如果你想描述文章和作者之间的关系,我想你需要每篇文章的作者列表
authors = []
author_lists = [] # <--- new
for record in records:
au = record.get('AU', '?')
author_lists.append(au) # <--- new
for a in au:
if a not in authors: authors.append(a)
authors.sort()
print(authors)
numpy, pandas matplotlib - 正是我习惯的工作方式
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
AU = np.array(authors) # authors as np-array
NA = AU.shape[0] # number of authors
NL = len(author_lists) # number of articles/author lists
AUL = np.array(author_lists) # author lists as np-array
print('NA, NL', NA,NL)
连通性articles/authors
CON = np.zeros((NL,NA),dtype=int) # initializes connectivity matrix
for j in range(NL): # run through the article's author list
aul = np.array(AUL[j]) # get a single author list as np-array
z = np.zeros((NA),dtype=int)
for k in range(len(aul)): # get a singel author
z += (AU==aul[k]) # get it's position in the AU, add it up
CON[j,:] = z # insert the result in the connectivity matrix
#---- grafics --------
fig = plt.figure(figsize=(20,10)) ;
plt.spy(CON, marker ='s', color='chartreuse', markersize=5)
plt.xlabel('Authors'); plt.ylabel('Articles'); plt.title('Authors of the articles', fontweight='bold')
plt.show()
连通性authors/co-authors,得到的矩阵是对称的
df = pd.DataFrame(CON) # let's use pandas for the following step
ACON = np.zeros((NA,NA)) # initialize the conncetivity matrix
for j in range(NA): # run through the authors
df_a = df[df.iloc[:, j] >0] # give all rows with author j involved
w = np.array(df_a.sum()) # sum the rows, store it in np-array
ACON[j] = w # insert it in the connectivity matrix
#---- grafics --------
fig = plt.figure(figsize=(10,10)) ;
plt.spy(ACON, marker ='s', color='chartreuse', markersize=3)
plt.xlabel('Authors'); plt.ylabel('Authors'); plt.title('Authors that are co-authors', fontweight='bold')
plt.show()
对于带有Networkx的图形,我认为你需要明确你想要代表什么,因为有很多点和很多可能性(也许你post一个例子?)。下面仅绘制了一些作者圈子。
import networkx as nx
def set_edges(Q):
case = 'A'
if case=='A':
Q1 = np.roll(Q,shift=1)
Edges = np.vstack((Q,Q1)).T
return Edges
Q = nx.Graph()
Q.clear()
AT = np.triu(ACON) # only the tridiagonal is needed
fig = plt.figure(figsize=(7,7)) ;
for k in range (9):
iA = np.argwhere(AT[k]>0).ravel() # get the indices with AT{k}>0
Edges = set_edges(iA) # select the involved nodes and set the edges
Q.add_edges_from(Edges, with_labels=True)
nx.draw(Q, alpha=0.5)
plt.title('Co-author-ship', fontweight='bold')
plt.show()