我尝试为囚徒困境中的学习算法创建相同类型的不同对象,但 python 混淆了它们
I try to create different objects of the same type for Learning Algorithms in Prisoner's Dilemma but python confuses them
显然,我想不是 Python 让他们感到困惑。我只是想不出我的错误的来源。
这是我的代码:
import numpy as np
class EW_learner:
#p is the probability distribution (from which we chose action C or D)
#pp are the potential payoffs (if 1 action was taken consistently)
#s is my real payoff or score (i.e. negated total prison years)
##pp[0] and action = 0 is cooperate (C) => (0=C)
##pp[1] and action = 1 is defect (D) => (1=D)
#ps is a list of the probability distribution at each round
#ss is a list of the scores at each round
def __init__(self, lr, p=[0.5,0.5], pp=[0,0], s=0, ps=[], ss=[]):
self.lr = lr
self.p = p
self.pp = pp
self.s = s
self.ps = ps
self.ss = ss
#Return an action (C or D) => (0 or 1)
def action(self):
return int(np.random.choice(2, 1, p=self.p))
def update(self, my_act, adv_act):
if (my_act == 0) and (adv_act == 0):
self.s -= 3
self.pp[0] -= 3
self.pp[1] -= 5
elif (my_act == 1) and (adv_act == 0):
self.s -= 5
self.pp[0] -= 3
self.pp[1] -= 5
elif (my_act == 0) and (adv_act == 1):
#self.s -= 0
#self.pp[0] -= 0
self.pp[1] -= 1
elif (my_act == 1) and (adv_act == 1):
self.s -= 1
#self.pp[0] -= 0
self.pp[1] -= 1
self.p[0] = np.power(1.0+self.lr, self.pp[0])
self.p[1] = 1 - self.p[0]
def collect_data(self):
(self.ps).append(self.p)
(self.ss).append(self.s)
def play(p1, p2, n_rounds):
for r in range(n_rounds):
act1 = p1.action()
act2 = p2.action()
p1.update(act1, act2)
p2.update(act2, act1)
p1.collect_data()
p2.collect_data()
print('P1 Score: ' + str(p1.s) + ', P2 Score: ' + str(p2.s))
print('P1 ProbDist: ' + str(p1.p) + ', P2 ProbDist: ' + str(p2.p))
return p1.ss, p2.ss, p1.ps, p2.ps
lucas = EW_learner(0.1)
paula = EW_learner(0.9)
sim = play(lucas, paula, 10)
当我调用 sim[0] 时,它基本上应该输出 p1.ss 这是卢卡斯在每一轮的得分,它输出这个:
in [85]: sim[0]
out[85]:
[-5,
0,
-6,
-1,
-6,
-6,
-7,
-7,
-8,
-8,
-9,
-9,
-10,
-10,
-11,
-11,
-12,
-12,
-13,
-13]
长度为 20(这没有意义,因为有 10 个回合)。此外,此输出似乎与 sim[1] 的输出完全相同,应该是 paula 在每一轮的得分。出于某种原因,他们合并到相同的列表中,交替使用卢卡斯的分数和保拉的分数。
我知道我可以使用 %2 轻松拆分列表,但我更愿意了解错误。
这样做:
import numpy as np
class EW_learner():
#p is the probability distribution (from which we chose action C or D)
#pp are the potential payoffs (if 1 action was taken consistently)
#s is my real payoff or score (i.e. negated total prison years)
##pp[0] and action = 0 is cooperate (C) => (0=C)
##pp[1] and action = 1 is defect (D) => (1=D)
#ps is a list of the probability distribution at each round
#ss is a list of the scores at each round
def __init__(self, lr, p=[0.5,0.5], pp=[0,0], s=0):
self.lr = lr
self.p = p
self.pp = pp
self.s = s
self.ps = []
self.ss = []
sim[0]
结果:
[0, -5, -6, -7, -8, -9, -10, -11, -12, -13]
显然,我想不是 Python 让他们感到困惑。我只是想不出我的错误的来源。
这是我的代码:
import numpy as np
class EW_learner:
#p is the probability distribution (from which we chose action C or D)
#pp are the potential payoffs (if 1 action was taken consistently)
#s is my real payoff or score (i.e. negated total prison years)
##pp[0] and action = 0 is cooperate (C) => (0=C)
##pp[1] and action = 1 is defect (D) => (1=D)
#ps is a list of the probability distribution at each round
#ss is a list of the scores at each round
def __init__(self, lr, p=[0.5,0.5], pp=[0,0], s=0, ps=[], ss=[]):
self.lr = lr
self.p = p
self.pp = pp
self.s = s
self.ps = ps
self.ss = ss
#Return an action (C or D) => (0 or 1)
def action(self):
return int(np.random.choice(2, 1, p=self.p))
def update(self, my_act, adv_act):
if (my_act == 0) and (adv_act == 0):
self.s -= 3
self.pp[0] -= 3
self.pp[1] -= 5
elif (my_act == 1) and (adv_act == 0):
self.s -= 5
self.pp[0] -= 3
self.pp[1] -= 5
elif (my_act == 0) and (adv_act == 1):
#self.s -= 0
#self.pp[0] -= 0
self.pp[1] -= 1
elif (my_act == 1) and (adv_act == 1):
self.s -= 1
#self.pp[0] -= 0
self.pp[1] -= 1
self.p[0] = np.power(1.0+self.lr, self.pp[0])
self.p[1] = 1 - self.p[0]
def collect_data(self):
(self.ps).append(self.p)
(self.ss).append(self.s)
def play(p1, p2, n_rounds):
for r in range(n_rounds):
act1 = p1.action()
act2 = p2.action()
p1.update(act1, act2)
p2.update(act2, act1)
p1.collect_data()
p2.collect_data()
print('P1 Score: ' + str(p1.s) + ', P2 Score: ' + str(p2.s))
print('P1 ProbDist: ' + str(p1.p) + ', P2 ProbDist: ' + str(p2.p))
return p1.ss, p2.ss, p1.ps, p2.ps
lucas = EW_learner(0.1)
paula = EW_learner(0.9)
sim = play(lucas, paula, 10)
当我调用 sim[0] 时,它基本上应该输出 p1.ss 这是卢卡斯在每一轮的得分,它输出这个:
in [85]: sim[0]
out[85]:
[-5,
0,
-6,
-1,
-6,
-6,
-7,
-7,
-8,
-8,
-9,
-9,
-10,
-10,
-11,
-11,
-12,
-12,
-13,
-13]
长度为 20(这没有意义,因为有 10 个回合)。此外,此输出似乎与 sim[1] 的输出完全相同,应该是 paula 在每一轮的得分。出于某种原因,他们合并到相同的列表中,交替使用卢卡斯的分数和保拉的分数。 我知道我可以使用 %2 轻松拆分列表,但我更愿意了解错误。
这样做:
import numpy as np
class EW_learner():
#p is the probability distribution (from which we chose action C or D)
#pp are the potential payoffs (if 1 action was taken consistently)
#s is my real payoff or score (i.e. negated total prison years)
##pp[0] and action = 0 is cooperate (C) => (0=C)
##pp[1] and action = 1 is defect (D) => (1=D)
#ps is a list of the probability distribution at each round
#ss is a list of the scores at each round
def __init__(self, lr, p=[0.5,0.5], pp=[0,0], s=0):
self.lr = lr
self.p = p
self.pp = pp
self.s = s
self.ps = []
self.ss = []
sim[0]
结果:
[0, -5, -6, -7, -8, -9, -10, -11, -12, -13]