从 Python Visdcc 网络图获取 ClickData
Get ClickData from Python Visdcc Network Graph
我正在努力构建所有宇航员的网络图,这些宇航员通过他们参与的任务连接起来。
到目前为止,这是我的代码:
#Import packages
import pandas as pd
import os
import dash
import dash_core_components as dcc
import dash_html_components as html
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output
import requests
import visdcc
#Download the astronaut database from SuperCluster
astronaut_db_url = 'https://supercluster-iadb.s3.us-east-2.amazonaws.com/adb.json'
astronauts_db = requests.get(astronaut_db_url).json()
#Make dataframes
df1 = pd.json_normalize(astronauts_db['astronauts'])
df2 = pd.json_normalize(astronauts_db['missions'])
#Grab columns
df_astro = df1[['_id','astroNumber','awards','name','gender','inSpace','overallNumber','spacewalkCount','species','speciesGroup',
'totalMinutesInSpace','totalSecondsSpacewalking','lastLaunchDate.utc']]
df_miss = df2[['_id','astronauts','keywords','name',
'seriesName','shortDescription','vagueLaunchDate',
'landDate.utc','launchDate.utc']]
#Change column names
df_astro = df_astro.rename(columns={'_id': 'astronaut_id'})
#Get row per award
df_awards = df_astro[['astronaut_id', 'awards']].copy()
df_awards['awards'] = df_awards['awards'].apply(lambda awards: [award['title'] for award in awards])
#Join awards column back on astronaut df
df_astro = pd.merge(df_astro,df_awards,how='left',on=['astronaut_id'])
#Clean up astronaut df
del df_astro['awards_x']
df_astro = df_astro.rename(columns={'awards_y': 'awards'})
#Change column names
df_miss = df_miss.rename(columns={'_id': 'mission_id'})
#Expand df to have multiple rows (many astronauts per mission)
df_test = df_miss.explode(['astronauts']).reset_index(drop=True)
#Pull out list of astronauts from JSON format
astronauts = pd.json_normalize(df_test['astronauts'])
#Add list of astronauts back into mission df
df_miss = pd.concat([df_test, astronauts], axis=1)
#Change column names
df_miss = df_miss.rename(columns={'_id': 'astronaut_id'})
del df_miss['astronauts']
#Cleaning time/day variables
df_miss['launch_time'] = pd.to_datetime(df_miss['launchDate.utc']).dt.time
df_miss['land_time'] = pd.to_datetime(df_miss['landDate.utc']).dt.time
df_miss['launch_date'] = df_miss['vagueLaunchDate']
df_miss['land_date'] = pd.to_datetime(df_miss['landDate.utc']).dt.date
del df_miss['vagueLaunchDate'],df_miss['landDate.utc'], df_miss['launchDate.utc']
#Join astronaut database with mission database
df_full = pd.merge(df_miss,df_astro,how='left',on=['astronaut_id'])
# Number of Awards per Astronaut
df_full['num_awards'] = df_full['awards'].str.len()
del df_full['lastLaunchDate.utc']
df_full = df_full.rename(columns={'name_x': 'mission_name'})
df_full = df_full.rename(columns={'name_y': 'astronaut_name'})
#Get the countries
from bs4 import BeautifulSoup
#!pip install selenium
from selenium import webdriver
#!pip install webdriver_manager
from webdriver_manager.chrome import ChromeDriverManager
import time
from selenium.webdriver.chrome.options import Options
data = []
url = 'https://www.supercluster.com/astronauts?ascending=false&limit=5000&list=true&sort=launch%20order'
options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(ChromeDriverManager().install(),options=options)
driver.maximize_window()
driver.get(url)
time.sleep(10)
soup = BeautifulSoup(driver.page_source, 'lxml')
driver.close()
tags = soup.select('.astronaut_cell.x')
for item in tags:
name = item.select_one('.bau.astronaut_cell__title.bold.mr05').get_text()
#print(name.text)
country = item.select_one('.mouseover__contents.rel.py05.px075.bau.caps.small.ac')
if country:
country=country.get_text()
#print(country)
data.append([name, country])
cols=['name','country']
df = pd.DataFrame(data,columns=cols)
df['names'] = df['name'].str.split(", ")
df['last_names'] = df['names'].str[0]
df['first_names'] = df['names'].str[1]
df['full_names'] = df['first_names'] + ' ' + df['last_names']
del df['names'], df['first_names'], df['name'], df['last_names']
df = df.rename(columns={'full_names': 'astronaut_name'})
#df_full.iloc[0:5, 10:20]
#Join country onto full astro df
astro_db = pd.merge(df_full,df,how='left',on=['astronaut_name'])
astro_db['launch_year'] = astro_db['launch_date'].str[0:4].astype(int)
#choice - test out dropdown
astro_db['ones'] = 1
country_condensed = astro_db[['country','ones']]
country_condensed = country_condensed.groupby(['country']).sum().reset_index()
country_condensed = country_condensed[country_condensed['ones']>1]
country_choices = country_condensed['country'].astype('str').unique()
country_choices = sorted(country_choices)
year_choices = astro_db['launch_year'].unique()
app = dash.Dash(__name__,assets_folder=os.path.join(os.curdir,"assets"))
server = app.server
app.layout = html.Div([
dbc.Row([
dbc.Col([
dcc.Dropdown(
id='dropdown1',
style={'color':'black'},
options=[{'label': i, 'value': i} for i in country_choices],
value=country_choices[-1]
)
],width=6),
dbc.Col([
dcc.RangeSlider(
id='range_slider',
min=year_choices.min(),
max=year_choices.max(),
step=1,
value=[2010, year_choices.max()],
allowCross=False,
pushable=2,
tooltip={"placement": "bottom", "always_visible": True},
marks={
1950: '1950',
1960: '1960',
1970: '1970',
1980: '1980',
1990: '1990',
2000: '2000',
2010: '2010',
2020: '2020'
}
),
],width=6),
dbc.Col([
visdcc.Network(
id='ng',
options = dict(
height='600px',
width='100%',
physics={'barnesHut': {'avoidOverlap': 0.5}},
maxVelocity=0,
stabilization={
'enabled': 'true',
'iterations': 15,
'updateInterval': 50,
'onlyDynamicEdges': 'false',
'fit': 'true'
},
scaling='value'
)
)
],width=12)
])
])
#Configure callback for network graph
@app.callback(
Output('ng','data'),
Input('dropdown1','value'),
Input('range_slider','value')
)
def network(dd1,range_slider1):
filtered = astro_db[['mission_name','astronaut_name','country','launch_year']]
filtered['Weights'] = 1
filtered = filtered[filtered['country']==dd1]
filtered = filtered[(filtered['launch_year']>=range_slider1[0]) & (filtered['launch_year']<=range_slider1[1])]
new_df = filtered
new_df.rename(columns={new_df.columns[0]: "Source"}, inplace = True)
new_df.rename(columns={new_df.columns[1]: "Target"}, inplace = True)
node_list = list(
set(new_df['Source'].unique().tolist()+new_df['Target'].unique().tolist())
)
nodes = [
({
'id': node_name,
'label': node_name,
'shape':'dot',
'color':'#626ffb',
'size':15
})
if node_name in new_df['Source'].unique()
else
({
'id': node_name,
'label': node_name,
'shape':'dot',
'color':'grey',
'size':15
})
for _, node_name in enumerate(node_list)]
#Create edges from df
edges=[]
for row in new_df.to_dict(orient='records'):
source, target = row['Source'], row['Target']
edges.append({
'id':source + "__" + target,
'from': source,
'to': target,
'width': 2
})
data = {'nodes':nodes, 'edges': edges}
return data
if __name__=='__main__':
app.run_server()
我希望能够单击网络图中的蓝色节点并获取有关节点的点击数据,类似于此处概述的过程:https://dash.plotly.com/interactive-graphing。这个过程看起来只适用于 dcc.Graph 个对象。不幸的是,网络图是在 visdcc.network 对象中构建的。
我是不是漏掉了什么?是不是还可以通过这种方式从网络图上抓取点击数据呢?如有任何帮助,我们将不胜感激!
一种方法是使用 visdcc 文档中提供的示例:
app.layout = html.Div([
visdcc.Network(id = 'net',
selection = {'nodes':[], 'edges':[]},
options = dict(height= '600px', width= '100%')),
html.Div(id = 'nodes'),
html.Div(id = 'edges')
])
@app.callback(
Output('nodes', 'children'),
[Input('net', 'selection')])
def myfun(x):
s = 'Selected nodes : '
if len(x['nodes']) > 0 : s += str(x['nodes'][0])
return s
@app.callback(
Output('edges', 'children'),
[Input('net', 'selection')])
def myfun(x):
s = 'Selected edges : '
if len(x['edges']) > 0 : s = [s] + [html.Div(i) for i in x['edges']]
return s
这不是为您的代码量身定制的,但此处变量 x 将包含图中选定的节点和边。然后,您可以使用有关他们的任何信息并将其显示在某些 div.
中
我正在努力构建所有宇航员的网络图,这些宇航员通过他们参与的任务连接起来。
到目前为止,这是我的代码:
#Import packages
import pandas as pd
import os
import dash
import dash_core_components as dcc
import dash_html_components as html
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output
import requests
import visdcc
#Download the astronaut database from SuperCluster
astronaut_db_url = 'https://supercluster-iadb.s3.us-east-2.amazonaws.com/adb.json'
astronauts_db = requests.get(astronaut_db_url).json()
#Make dataframes
df1 = pd.json_normalize(astronauts_db['astronauts'])
df2 = pd.json_normalize(astronauts_db['missions'])
#Grab columns
df_astro = df1[['_id','astroNumber','awards','name','gender','inSpace','overallNumber','spacewalkCount','species','speciesGroup',
'totalMinutesInSpace','totalSecondsSpacewalking','lastLaunchDate.utc']]
df_miss = df2[['_id','astronauts','keywords','name',
'seriesName','shortDescription','vagueLaunchDate',
'landDate.utc','launchDate.utc']]
#Change column names
df_astro = df_astro.rename(columns={'_id': 'astronaut_id'})
#Get row per award
df_awards = df_astro[['astronaut_id', 'awards']].copy()
df_awards['awards'] = df_awards['awards'].apply(lambda awards: [award['title'] for award in awards])
#Join awards column back on astronaut df
df_astro = pd.merge(df_astro,df_awards,how='left',on=['astronaut_id'])
#Clean up astronaut df
del df_astro['awards_x']
df_astro = df_astro.rename(columns={'awards_y': 'awards'})
#Change column names
df_miss = df_miss.rename(columns={'_id': 'mission_id'})
#Expand df to have multiple rows (many astronauts per mission)
df_test = df_miss.explode(['astronauts']).reset_index(drop=True)
#Pull out list of astronauts from JSON format
astronauts = pd.json_normalize(df_test['astronauts'])
#Add list of astronauts back into mission df
df_miss = pd.concat([df_test, astronauts], axis=1)
#Change column names
df_miss = df_miss.rename(columns={'_id': 'astronaut_id'})
del df_miss['astronauts']
#Cleaning time/day variables
df_miss['launch_time'] = pd.to_datetime(df_miss['launchDate.utc']).dt.time
df_miss['land_time'] = pd.to_datetime(df_miss['landDate.utc']).dt.time
df_miss['launch_date'] = df_miss['vagueLaunchDate']
df_miss['land_date'] = pd.to_datetime(df_miss['landDate.utc']).dt.date
del df_miss['vagueLaunchDate'],df_miss['landDate.utc'], df_miss['launchDate.utc']
#Join astronaut database with mission database
df_full = pd.merge(df_miss,df_astro,how='left',on=['astronaut_id'])
# Number of Awards per Astronaut
df_full['num_awards'] = df_full['awards'].str.len()
del df_full['lastLaunchDate.utc']
df_full = df_full.rename(columns={'name_x': 'mission_name'})
df_full = df_full.rename(columns={'name_y': 'astronaut_name'})
#Get the countries
from bs4 import BeautifulSoup
#!pip install selenium
from selenium import webdriver
#!pip install webdriver_manager
from webdriver_manager.chrome import ChromeDriverManager
import time
from selenium.webdriver.chrome.options import Options
data = []
url = 'https://www.supercluster.com/astronauts?ascending=false&limit=5000&list=true&sort=launch%20order'
options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(ChromeDriverManager().install(),options=options)
driver.maximize_window()
driver.get(url)
time.sleep(10)
soup = BeautifulSoup(driver.page_source, 'lxml')
driver.close()
tags = soup.select('.astronaut_cell.x')
for item in tags:
name = item.select_one('.bau.astronaut_cell__title.bold.mr05').get_text()
#print(name.text)
country = item.select_one('.mouseover__contents.rel.py05.px075.bau.caps.small.ac')
if country:
country=country.get_text()
#print(country)
data.append([name, country])
cols=['name','country']
df = pd.DataFrame(data,columns=cols)
df['names'] = df['name'].str.split(", ")
df['last_names'] = df['names'].str[0]
df['first_names'] = df['names'].str[1]
df['full_names'] = df['first_names'] + ' ' + df['last_names']
del df['names'], df['first_names'], df['name'], df['last_names']
df = df.rename(columns={'full_names': 'astronaut_name'})
#df_full.iloc[0:5, 10:20]
#Join country onto full astro df
astro_db = pd.merge(df_full,df,how='left',on=['astronaut_name'])
astro_db['launch_year'] = astro_db['launch_date'].str[0:4].astype(int)
#choice - test out dropdown
astro_db['ones'] = 1
country_condensed = astro_db[['country','ones']]
country_condensed = country_condensed.groupby(['country']).sum().reset_index()
country_condensed = country_condensed[country_condensed['ones']>1]
country_choices = country_condensed['country'].astype('str').unique()
country_choices = sorted(country_choices)
year_choices = astro_db['launch_year'].unique()
app = dash.Dash(__name__,assets_folder=os.path.join(os.curdir,"assets"))
server = app.server
app.layout = html.Div([
dbc.Row([
dbc.Col([
dcc.Dropdown(
id='dropdown1',
style={'color':'black'},
options=[{'label': i, 'value': i} for i in country_choices],
value=country_choices[-1]
)
],width=6),
dbc.Col([
dcc.RangeSlider(
id='range_slider',
min=year_choices.min(),
max=year_choices.max(),
step=1,
value=[2010, year_choices.max()],
allowCross=False,
pushable=2,
tooltip={"placement": "bottom", "always_visible": True},
marks={
1950: '1950',
1960: '1960',
1970: '1970',
1980: '1980',
1990: '1990',
2000: '2000',
2010: '2010',
2020: '2020'
}
),
],width=6),
dbc.Col([
visdcc.Network(
id='ng',
options = dict(
height='600px',
width='100%',
physics={'barnesHut': {'avoidOverlap': 0.5}},
maxVelocity=0,
stabilization={
'enabled': 'true',
'iterations': 15,
'updateInterval': 50,
'onlyDynamicEdges': 'false',
'fit': 'true'
},
scaling='value'
)
)
],width=12)
])
])
#Configure callback for network graph
@app.callback(
Output('ng','data'),
Input('dropdown1','value'),
Input('range_slider','value')
)
def network(dd1,range_slider1):
filtered = astro_db[['mission_name','astronaut_name','country','launch_year']]
filtered['Weights'] = 1
filtered = filtered[filtered['country']==dd1]
filtered = filtered[(filtered['launch_year']>=range_slider1[0]) & (filtered['launch_year']<=range_slider1[1])]
new_df = filtered
new_df.rename(columns={new_df.columns[0]: "Source"}, inplace = True)
new_df.rename(columns={new_df.columns[1]: "Target"}, inplace = True)
node_list = list(
set(new_df['Source'].unique().tolist()+new_df['Target'].unique().tolist())
)
nodes = [
({
'id': node_name,
'label': node_name,
'shape':'dot',
'color':'#626ffb',
'size':15
})
if node_name in new_df['Source'].unique()
else
({
'id': node_name,
'label': node_name,
'shape':'dot',
'color':'grey',
'size':15
})
for _, node_name in enumerate(node_list)]
#Create edges from df
edges=[]
for row in new_df.to_dict(orient='records'):
source, target = row['Source'], row['Target']
edges.append({
'id':source + "__" + target,
'from': source,
'to': target,
'width': 2
})
data = {'nodes':nodes, 'edges': edges}
return data
if __name__=='__main__':
app.run_server()
我希望能够单击网络图中的蓝色节点并获取有关节点的点击数据,类似于此处概述的过程:https://dash.plotly.com/interactive-graphing。这个过程看起来只适用于 dcc.Graph 个对象。不幸的是,网络图是在 visdcc.network 对象中构建的。
我是不是漏掉了什么?是不是还可以通过这种方式从网络图上抓取点击数据呢?如有任何帮助,我们将不胜感激!
一种方法是使用 visdcc 文档中提供的示例:
app.layout = html.Div([
visdcc.Network(id = 'net',
selection = {'nodes':[], 'edges':[]},
options = dict(height= '600px', width= '100%')),
html.Div(id = 'nodes'),
html.Div(id = 'edges')
])
@app.callback(
Output('nodes', 'children'),
[Input('net', 'selection')])
def myfun(x):
s = 'Selected nodes : '
if len(x['nodes']) > 0 : s += str(x['nodes'][0])
return s
@app.callback(
Output('edges', 'children'),
[Input('net', 'selection')])
def myfun(x):
s = 'Selected edges : '
if len(x['edges']) > 0 : s = [s] + [html.Div(i) for i in x['edges']]
return s
这不是为您的代码量身定制的,但此处变量 x 将包含图中选定的节点和边。然后,您可以使用有关他们的任何信息并将其显示在某些 div.
中