从 Python pandas 数据集中删除特定数据
Remove specific data from a Python pandas dataset
我们编写此代码是为了绘制这样的电荷谱(来自特定 CH(另一列)的 HG(列)的组织):
该图是从数据集中获得的,其中有一列名为“Ts (ns)”:
CH LG HG Ts(ns) ToT(ns)
0 0.0 75.0 80.0 0.0 0.0
1 1.0 73.0 77.0 0.0 0.0
2 2.0 74.0 65.0 0.0 0.0
3 3.0 72.0 73.0 0.0 0.0
4 4.0 74.0 74.0 0.0 0.0
... ... ... ... ... ...
2613307 59.0 75.0 74.0 0.0 0.0
2613308 60.0 76.0 106.0 811.5 10.5
2613309 61.0 75.0 75.0 0.0 0.0
2613310 62.0 76.0 73.0 0.0 0.0
2613311 63.0 74.0 100.0 0.0 0.0
我们的目标是从图中仅删除“Ts”与“-”不同的数据。代码如下:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import re
import numpy as np
import os
from tqdm import tqdm, trange
print('''
_____ _ _ _ _
| __ \ | | | | (_) | |
| |__) | ___ __ _ __| | | | _ ___ | |_ _ __ _ _
| _ / / _ \ / _` | / _` | | | | |/ __|| __| | '_ \ | | | |
| | \ \| __/| (_| || (_| | | |____ | |\__ \| |_ _ | |_) || |_| |
|_| \_\___| \__,_| \__,_| |______||_||___/ \__|(_)| .__/ \__, |
| | __/ |
|_| |___/
''')
names = ['CH','LG','HG','Ts(ns)','ToT(ns)']
righe_primo_header = 5
righe_header = 5
canali = 64
# input file
infile = 'Run1_list_14_03_2022.txt'
# determinare numero di righe, poi di eventi nel file di input
stream = os.popen('wc -l '+ infile)
nrighe = stream.read()
match = re.match(r"\s*(\S+) (.*)", nrighe, re.I)
items = match.groups()
nrighe = float(items[0])
# numero di blocchi di dati da leggere
ntrigger = (nrighe - righe_primo_header) / (canali + righe_header) - 1
ntrigger = int( ntrigger)
print('\nNel file sono stati trovati ',ntrigger,' eventi.\n')
ncanali_histo = int(np.sqrt(ntrigger))
ncanali_histo = 4096
events = []
file1 = open( infile, 'r')
for line in range(righe_primo_header-1):
line = file1.readline()
line=file1.readline()
pbar = tqdm(total=ntrigger)
for trigger in range(ntrigger):
for lineh in range(righe_header):
line = file1.readline()
#print('saltiamo ',line)
for canale in range(canali):
line = file1.readline()
#print(' elaboriamo ',line)
match = re.match(r"(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+", line, re.I)
temparray = []
if match:
items = match.groups()
for colonna in range(len(items)):
col = items[colonna]
if col == '-':
valore = 0
else:
valore = float(items[colonna])
temparray.append(valore)
events.append(temparray)
pbar.update(1)
file1.close()
df = pd.DataFrame( events, columns = names)
print('\nIl dataframe totale è il seguente:\n', df)
# istogramma di HG per canale fissato
plot_df = df.loc[ (df['CH'] == 44) | (df['CH'] == 13)| (df['CH'] == 19) | (df['CH'] == 50) ]
print('\nIl dataframe selezionato è il seguente:\n', plot_df)
pd.options.mode.chained_assignment = None # default='warn' permette di sovrascrivere il dataframe.
plot_df['HG'][df['CH']==13] -= (99.2910)
plot_df['HG'][df['CH']==13] /= (33.1946) #effettua una prima calibrazione
plot_df['HG'][df['CH']==13] += (1)
plot_df['HG'][df['CH']==19] -= (99.2910)
plot_df['HG'][df['CH']==19] /= (34.2973) #effettua una prima calibrazione
plot_df['HG'][df['CH']==19] += (1)
plot_df['HG'][df['CH']==44] -= (99.0216)
plot_df['HG'][df['CH']==44] /= (32.8129) #effettua una prima calibrazione
plot_df['HG'][df['CH']==44] += (1)
plot_df['HG'][df['CH']==50] -= (98.1520)
plot_df['HG'][df['CH']==50] /= (34.1027) #effettua una prima calibrazione
plot_df['HG'][df['CH']==50] += (1)
layout = go.Layout(
title="Equalized HG Multiphoton Spectrum",
xaxis=dict(
title="#p.e."
),
yaxis=dict(
title="Counts"
) )
fig3 = go.Figure(layout=layout)
fig3.add_trace(go.Histogram(x=plot_df['HG'][df['CH']==13],name='CH 13'))
fig3.update_traces(xbins=dict(start=0, end=10, size=1/33.1946)) #gain canale 13
fig3.add_trace(go.Histogram(x=plot_df['HG'][df['CH']==19],name='CH 19'))
fig3.update_traces(xbins=dict(start=0, end=10, size=1/34.2973)) #gain canale 19
fig3.add_trace(go.Histogram(x=plot_df['HG'][df['CH']==44],name='CH 44'))
fig3.update_traces(xbins=dict(start=0, end=10, size=1/32.8129)) #gain canale 44
fig3.add_trace(go.Histogram(x=plot_df['HG'][df['CH']==50],name='CH 50'))
fig3.update_traces(xbins=dict(start=0, end=10, size=1/34.1027)) #gain canale 50
fig3.update_xaxes(showline=True, linewidth=1, linecolor='black', gridcolor='black')
fig3.update_yaxes(showline=True, linewidth=1, linecolor='black', gridcolor='black')
fig3.update_layout(barmode='stack') #per il cumulativo
#fig3.update_layout(barmode='overlay') #per i plot singoli
fig3.update_layout({
'plot_bgcolor': 'rgba(0, 0, 0, 0)',
'paper_bgcolor': 'rgba(0, 0, 0, 0)',
})
fig3.show()
"""
evento = 3
plot_df2 = df[(canali * evento):(canali*evento + canali)]
print('Il dataframe per lo scatter plot HG vs Ch relativo all evento ',evento, 'è il seguente: ', plot_df2)
fig2 = px.scatter(plot_df2, x='CH', y='HG', title='HG vs CH event ' + str(evento) )
fig2.show()
"""
同时文件上传到这里:https://mega.nz/file/E1oEwLjD#PQcIiRrSDJD9LT4enrN1bQoWLmKpUTzBZLWVylVn_7g
我们如何做到这一点?提前致谢。
您可以根据布尔条件删除它们。在您的情况下,这应该有效:
df = df[df['Ts(ns)'] != '-'] # or change the '-' with the value you want filter on
你可以看看this作为参考。
我们编写此代码是为了绘制这样的电荷谱(来自特定 CH(另一列)的 HG(列)的组织):
该图是从数据集中获得的,其中有一列名为“Ts (ns)”:
CH LG HG Ts(ns) ToT(ns)
0 0.0 75.0 80.0 0.0 0.0
1 1.0 73.0 77.0 0.0 0.0
2 2.0 74.0 65.0 0.0 0.0
3 3.0 72.0 73.0 0.0 0.0
4 4.0 74.0 74.0 0.0 0.0
... ... ... ... ... ...
2613307 59.0 75.0 74.0 0.0 0.0
2613308 60.0 76.0 106.0 811.5 10.5
2613309 61.0 75.0 75.0 0.0 0.0
2613310 62.0 76.0 73.0 0.0 0.0
2613311 63.0 74.0 100.0 0.0 0.0
我们的目标是从图中仅删除“Ts”与“-”不同的数据。代码如下:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import re
import numpy as np
import os
from tqdm import tqdm, trange
print('''
_____ _ _ _ _
| __ \ | | | | (_) | |
| |__) | ___ __ _ __| | | | _ ___ | |_ _ __ _ _
| _ / / _ \ / _` | / _` | | | | |/ __|| __| | '_ \ | | | |
| | \ \| __/| (_| || (_| | | |____ | |\__ \| |_ _ | |_) || |_| |
|_| \_\___| \__,_| \__,_| |______||_||___/ \__|(_)| .__/ \__, |
| | __/ |
|_| |___/
''')
names = ['CH','LG','HG','Ts(ns)','ToT(ns)']
righe_primo_header = 5
righe_header = 5
canali = 64
# input file
infile = 'Run1_list_14_03_2022.txt'
# determinare numero di righe, poi di eventi nel file di input
stream = os.popen('wc -l '+ infile)
nrighe = stream.read()
match = re.match(r"\s*(\S+) (.*)", nrighe, re.I)
items = match.groups()
nrighe = float(items[0])
# numero di blocchi di dati da leggere
ntrigger = (nrighe - righe_primo_header) / (canali + righe_header) - 1
ntrigger = int( ntrigger)
print('\nNel file sono stati trovati ',ntrigger,' eventi.\n')
ncanali_histo = int(np.sqrt(ntrigger))
ncanali_histo = 4096
events = []
file1 = open( infile, 'r')
for line in range(righe_primo_header-1):
line = file1.readline()
line=file1.readline()
pbar = tqdm(total=ntrigger)
for trigger in range(ntrigger):
for lineh in range(righe_header):
line = file1.readline()
#print('saltiamo ',line)
for canale in range(canali):
line = file1.readline()
#print(' elaboriamo ',line)
match = re.match(r"(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+", line, re.I)
temparray = []
if match:
items = match.groups()
for colonna in range(len(items)):
col = items[colonna]
if col == '-':
valore = 0
else:
valore = float(items[colonna])
temparray.append(valore)
events.append(temparray)
pbar.update(1)
file1.close()
df = pd.DataFrame( events, columns = names)
print('\nIl dataframe totale è il seguente:\n', df)
# istogramma di HG per canale fissato
plot_df = df.loc[ (df['CH'] == 44) | (df['CH'] == 13)| (df['CH'] == 19) | (df['CH'] == 50) ]
print('\nIl dataframe selezionato è il seguente:\n', plot_df)
pd.options.mode.chained_assignment = None # default='warn' permette di sovrascrivere il dataframe.
plot_df['HG'][df['CH']==13] -= (99.2910)
plot_df['HG'][df['CH']==13] /= (33.1946) #effettua una prima calibrazione
plot_df['HG'][df['CH']==13] += (1)
plot_df['HG'][df['CH']==19] -= (99.2910)
plot_df['HG'][df['CH']==19] /= (34.2973) #effettua una prima calibrazione
plot_df['HG'][df['CH']==19] += (1)
plot_df['HG'][df['CH']==44] -= (99.0216)
plot_df['HG'][df['CH']==44] /= (32.8129) #effettua una prima calibrazione
plot_df['HG'][df['CH']==44] += (1)
plot_df['HG'][df['CH']==50] -= (98.1520)
plot_df['HG'][df['CH']==50] /= (34.1027) #effettua una prima calibrazione
plot_df['HG'][df['CH']==50] += (1)
layout = go.Layout(
title="Equalized HG Multiphoton Spectrum",
xaxis=dict(
title="#p.e."
),
yaxis=dict(
title="Counts"
) )
fig3 = go.Figure(layout=layout)
fig3.add_trace(go.Histogram(x=plot_df['HG'][df['CH']==13],name='CH 13'))
fig3.update_traces(xbins=dict(start=0, end=10, size=1/33.1946)) #gain canale 13
fig3.add_trace(go.Histogram(x=plot_df['HG'][df['CH']==19],name='CH 19'))
fig3.update_traces(xbins=dict(start=0, end=10, size=1/34.2973)) #gain canale 19
fig3.add_trace(go.Histogram(x=plot_df['HG'][df['CH']==44],name='CH 44'))
fig3.update_traces(xbins=dict(start=0, end=10, size=1/32.8129)) #gain canale 44
fig3.add_trace(go.Histogram(x=plot_df['HG'][df['CH']==50],name='CH 50'))
fig3.update_traces(xbins=dict(start=0, end=10, size=1/34.1027)) #gain canale 50
fig3.update_xaxes(showline=True, linewidth=1, linecolor='black', gridcolor='black')
fig3.update_yaxes(showline=True, linewidth=1, linecolor='black', gridcolor='black')
fig3.update_layout(barmode='stack') #per il cumulativo
#fig3.update_layout(barmode='overlay') #per i plot singoli
fig3.update_layout({
'plot_bgcolor': 'rgba(0, 0, 0, 0)',
'paper_bgcolor': 'rgba(0, 0, 0, 0)',
})
fig3.show()
"""
evento = 3
plot_df2 = df[(canali * evento):(canali*evento + canali)]
print('Il dataframe per lo scatter plot HG vs Ch relativo all evento ',evento, 'è il seguente: ', plot_df2)
fig2 = px.scatter(plot_df2, x='CH', y='HG', title='HG vs CH event ' + str(evento) )
fig2.show()
"""
同时文件上传到这里:https://mega.nz/file/E1oEwLjD#PQcIiRrSDJD9LT4enrN1bQoWLmKpUTzBZLWVylVn_7g
我们如何做到这一点?提前致谢。
您可以根据布尔条件删除它们。在您的情况下,这应该有效:
df = df[df['Ts(ns)'] != '-'] # or change the '-' with the value you want filter on
你可以看看this作为参考。