def reverse_state(state): # reverse the state
return np.array([state[1, :],state[0, :]])
def env(Q, epsilon, eta, gamma, pi):
state = np.array([[1, 1], [1, 1]]) # 초기 상태 first state
turn = 0 # 턴 수 turn number
action = get_action(state, Q, epsilon, pi) # 초기 행동 first action
while (1):
turn += 1 # 턴 수
# 다음 단계 state 구하기 get nextstate with action
state_next = get_next_state(state, action)
# 보상 부여 후 다음 행동 계산 give reward and get next action
if (state[1, :] == [0, 0]).all(): # 이긴 경우 # is win
reward = 1
action = np.nan
else:
reward = 0
action = get_action(state_next, Q, epsilon, pi)
# 가치함수 수정 edit Q function
Q = Q_learning(state, action, reward, state_next, Q, eta, gamma)
# 종료 여부 판정 is done
if (state[0, :] == [0, 0]).all() or (state[1, :] == [0, 0]).all():
break
else:
state = state_next
return [turn, Q]
When the number of turn is an even number(when it is the opponent player's viewpoint), I want to reverse the state and then complete the calculations, and then reverst one more time to return to the original viewpoint before calculating on the next turn. I have no idea what to do here.
I tried it
def env(Q, epsilon, eta, gamma, pi): # 환경
state = np.array([[1, 1], [1, 1]]) # 초기 상태
turn = 0 # 턴 수
action = get_action(state, Q, epsilon, pi) # 초기 행동
while (1):
turn += 1 # 턴 수
if turn % 2 == 0:
# 다음 단계 state 구하기
state = reverse_state(state)
state_next = get_next_state(state, action)
# 보상 부여 후 다음 행동 계산
if (state[1, :] == [0, 0]).all(): # 이긴 경우
reward = 1
action = np.nan
else:
reward = 0
action = get_action(state_next, Q, epsilon, pi)
# 가치함수 수정
Q = Q_learning(state, action, reward, state_next, Q, eta, gamma)
else:
# 다음 단계 state 구하기
state_next = get_next_state(state, action)
# 보상 부여 후 다음 행동 계산
if (state[1, :] == [0, 0]).all(): # 이긴 경우
reward = 1
action = np.nan
else:
reward = 0
action = get_action(state_next, Q, epsilon, pi)
# 가치함수 수정
Q = Q_learning(state, action, reward, state_next, Q, eta, gamma)
# 종료 여부 판정
if (state[0, :] == [0, 0]).all() or (state[1, :] == [0, 0]).all():
break
else:
if turn % 2 == 0:
state = reverse_state(state_next)
else:
state = state_next
print(state)
return [turn, Q]
like this but it doesn't work.