从 GITHUB 加载 JSON 数据到 PYTHON 并进行流式传输

Load JSON DATA From GITHUB IN PYTHON AND STREMLIT

大家好,在此先感谢您的帮助,

我一直在使用 streamlit 和 Python 进行开发,现在我想在 Heroku 中部署所有想法。

这是我第一次在 heroku 中部署应用程序。我 想从 github 加载 json 文件,因为我想每周用 statsbomb 数据更新应用程序。

直到现在我一直在从我的电脑加载 json 文件, 请找到附件代码:

import streamlit as st
import pandas as pd
import json
import os
from pandas import json_normalize
from flatten_json import flatten

import matplotlib.pyplot as plt
from matplotlib.patches import Arc
import numpy as np
from PIL import Image
import urllib, json

### NAME OF THE APP ###
x = st.sidebar.header(' Analytics')  #  this is a widget

### SEASON SELECTOR ###
file_to_charge2 = st.sidebar.selectbox('temporada',('Temporada 2018-2019','Temporada 2017-2018','Temporada 2016-2017'))

if file_to_charge2 == 'Temporada 2018-2019':
    file_to_charge = '4.json'
elif file_to_charge2 == 'Temporada 2017-2018':
    file_to_charge2 = '1.json'
elif file_to_charge2== 'Temporada 2016-2017':
    file_to_charge = '2.json'


### MATCH SELECTOR ###

home_team = st.sidebar.selectbox('Equipo juega en casa:',('Athletic Bilbao', 'Atlético Madrid', 'Barcelona', 'Celta Vigo', 'Deportivo Alavés', 'Deportivo La Coruna', 'Eibar', 'Espanyol', 'Getafe', 'Girona', 'Granada', 'Las Palmas', 'Leganés', 'Levante', 'Málaga', 'Osasuna', 'Rayo Vallecano', 'Real Betis', 'Real Madrid', 'Real Sociedad', 'Real Valladolid', 'Sevilla', 'Sporting Gijón', 'Valencia', 'Villarreal'))
away_team = st.sidebar.selectbox('Equipo fuera de casa:',('Athletic Bilbao', 'Atlético Madrid', 'Barcelona', 'Celta Vigo', 'Deportivo Alavés', 'Deportivo La Coruna', 'Eibar', 'Espanyol', 'Getafe', 'Girona', 'Granada', 'Las Palmas', 'Leganés', 'Levante', 'Málaga', 'Osasuna', 'Rayo Vallecano', 'Real Betis', 'Real Madrid', 'Real Sociedad', 'Real Valladolid', 'Sevilla', 'Sporting Gijón', 'Valencia', 'Villarreal'))
#json_normalize(my_data3, sep='_').assign(match_id=file_name[:-5])



###LOAD THE JSON FILE FROM COMPUTER### 

st.title('> Datos por temporada:')
dfpartidos = json.load(open(os.path.expanduser('~/Desktop/DATOS/open-data-master/data/matches/11/' + file_to_charge), 'r', encoding='utf-8'))



### DATAFRAME OF THE SEASON ###
FIELDS = ['match_id','match_week',"home_team.home_team_name",'away_team.away_team_name','home_score','away_score','referee.name']
dfdef = pd.json_normalize(dfpartidos)
p = dfdef[FIELDS]

### DATAFRAME OF THE TEAMS ###
l = p.loc[p['home_team.home_team_name'] == home_team, :]
l = l.loc[l['away_team.away_team_name'] == away_team, :]




#TABLE WITH DATA OF THE SELECTED SEASON:
st.subheader('Datos de la  {} :'.format(file_to_charge2))
st.write(p)

#TABLE WITH DATA OF THE SELECETED TEAMS

st.write('Home Team:',home_team,'VS','Away Team:',away_team)

st.write(l)

此代码运行良好,可以分析惊人的数据。但是 如果我想要一个更新的应用程序。这就是为什么我需要从 github 启动 json 文件的原因。我做了一些不成功的测试,但我有一些问题。请找到附件中的代码和错误。

import streamlit as st
import pandas as pd
import json
import os
from pandas import json_normalize
from flatten_json import flatten

import matplotlib.pyplot as plt
from matplotlib.patches import Arc
import numpy as np
from PIL import Image
import urllib, json
import plotly.express as px
import altair as atl
import re, json, requests

x = st.sidebar.header(' Analytics')  #  this is a widget

####
file_to_charge2 = st.sidebar.selectbox('temporada',
                                       ('Temporada 2018-2019', 'Temporada 2017-2018', 'Temporada 2016-2017'))

if file_to_charge2 == 'Temporada 2018-2019':
    file_to_charge = '4.json'
elif file_to_charge2 == 'Temporada 2017-2018':
    file_to_charge2 = '1.json'
elif file_to_charge2 == 'Temporada 2016-2017':
    file_to_charge = '2.json'


### busqueda de partidos ###

home_team = st.sidebar.selectbox('Equipo juega en casa:', (
'Athletic Bilbao', 'Atlético Madrid', 'Barcelona', 'Celta Vigo', 'Deportivo Alavés', 'Deportivo La Coruna', 'Eibar',
'Espanyol', 'Getafe', 'Girona', 'Granada', 'Las Palmas', 'Leganés', 'Levante', 'Málaga', 'Osasuna', 'Rayo Vallecano',
'Real Betis', 'Real Madrid', 'Real Sociedad', 'Real Valladolid', 'Sevilla', 'Sporting Gijón', 'Valencia', 'Villarreal'))
away_team = st.sidebar.selectbox('Equipo fuera de casa:', (
'Athletic Bilbao', 'Atlético Madrid', 'Barcelona', 'Celta Vigo', 'Deportivo Alavés', 'Deportivo La Coruna', 'Eibar',
'Espanyol', 'Getafe', 'Girona', 'Granada', 'Las Palmas', 'Leganés', 'Levante', 'Málaga', 'Osasuna', 'Rayo Vallecano',
'Real Betis', 'Real Madrid', 'Real Sociedad', 'Real Valladolid', 'Sevilla', 'Sporting Gijón', 'Valencia', 'Villarreal'))
# json_normalize(my_data3, sep='_').assign(match_id=file_name[:-5])




### TRYING TO LOAD DATA FROM GITHUB // THE PROBLEM ###
url = 'https://raw.github.com/statsbomb/open-data/matches/11/' + file_to_charge
resp = requests.get(url)
st.title('> Datos por temporada:')
dfpartidos = json.loads(resp.text)



### DATAFRAME OF THE SEASON ###
FIELDS = ['match_id', 'match_week', "home_team.home_team_name", 'away_team.away_team_name', 'home_score', 'away_score',
          'referee.name']
dfdef = pd.json_normalize(dfpartidos)
p = dfdef[FIELDS]

# DATAFRAME SEGÚN EQUIPOS FILTRADOS
l = p.loc[p['home_team.home_team_name'] == home_team, :]
l = l.loc[l['away_team.away_team_name'] == away_team, :]

# TABLE WITH DATA OF THE SELECTED SEASON:
st.subheader('Datos de la  {} :'.format(file_to_charge2))
st.write(p)

# TABLE WITH DATA OF THE SELECETED TEAMS

st.write('Home Team:', home_team, 'VS', 'Away Team:', away_team)

st.write(l)

并且错误加载 json 日期来自 github。

JSONDecodeError: Extra data: line 1 column 4 (char 3)
Traceback:
File "/Users/DOMIN2662/Desktop/streamlit/venv/lib/python3.7/site-packages/streamlit/script_runner.py", line 324, in _run_script
    exec(code, module.__dict__)
File "/Users/DOMIN2662/Desktop/streamlit/duda22.py", line 50, in <module>
    dfpartidos = json.loads(resp.text)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/__init__.py", line 348, in loads
    return _default_decoder.decode(s)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/decoder.py", line 340, in decode
    raise JSONDecodeError("Extra data", s, end)

我想强调社区的支持,我希望在接下来的未来我能帮助其他开发者。

您指向错误的 URL,这将 return 404 而不是有效的 JSON。这反过来又不能被 json.loads 解析,导致 JSONDecodeError

将您的 url 变量代码更改为:

url = (
"https://raw.githubusercontent.com/statsbomb/open-data/master/data/matches/11/"
+ file_to_charge
)