-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathQLearningAlgorithm.cpp
More file actions
146 lines (107 loc) · 4.28 KB
/
QLearningAlgorithm.cpp
File metadata and controls
146 lines (107 loc) · 4.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
// Team Members 👩🏻💻👩🏻💻
// - Sarah Abu irmeileh
// - Asia Shalaldah
#include <iostream>
#include <cmath>
using namespace std;
// Constants for the array dimensions
const int rows = 6;
const int columns = 6;
// The headers for all functions
void printArray(int array[rows][columns]);
void initializeRewardArray(int Reward[rows][columns]);
int getRandomPossibleAction(int state, int Reward[rows][columns]);
void QLearningAlgorithm(int Reward[rows][columns], int QTable[rows][columns], double y, int episodes);
int main (){
// The rows represents the states and the columns represents the actions
int Reward[rows][columns];
// We want to reach F. Hence, F-> is the goal
// We can add 'A' to each row & column to make the result more familiar to the table in the Readme file
// Assigning the values to the Reward array
initializeRewardArray(Reward);
cout << "The Reward array is : " << endl << endl;
printArray(Reward);
cout << endl << endl;
// Initialize the Q-table array and initially all it's values are -1
int QTable[rows][columns];
for (int i = 0; i < rows; ++i) {
for (int j = 0; j < columns; ++j) {
QTable[i][j] = -1;
}
}
// The # of trials for computer to learn, usually the # is big
int episode = 1000;
// Constant for learning rate
double y = 0.8;
// Seed for random number generator
srand(time(0));
// Apply Q-learning algorithm
QLearningAlgorithm(Reward, QTable, y, episode);
cout << "The Q table after " << episode << " episodes is : " << endl << endl;
printArray(QTable);
cout << endl;
return 0;
}
void printArray(int array[rows][columns]){
for (int i = 0; i < rows; i++){
for (int j = 0; j < columns; j++){
cout << array[i][j] << " ";
}
cout << endl;
}
}
void initializeRewardArray(int Reward[rows][columns]){
// Assigning the values to the Reward array as the picture in the ReadMe file
for (int i = 0; i < rows; i++){
for (int j = 0; j < columns; j++){
if (j == 5 && (i == 1 || i == 4 || i == 5)){
// If the column is F (If we reach the goal) give it big value
// In other words if there is a direct action from current state to the gaol state
Reward[i][j] = 100;
}else if ((i == 0 && j == 4) || ((i == 1 || i == 2) && ( j == 3)) || (i == 5 && ( j == 1 || j == 4)) ){
// If there is a possible action give it value 0
Reward[i][j] = 0;
}else if (((i == 3) && (j == 1 || j == 2 || j == 4)) || ( i == 4 && ( j == 0 || j == 3))){
// If there is a possible action give it value 0
Reward[i][j] = 0;
}else{
// If there is no possible action (movement) give it -1
Reward[i][j] = -1;
}
}
}
}
void QLearningAlgorithm(int Reward[rows][columns], int QTable[rows][columns], double y, int episodes){
// Loop for all episodes
while(episodes--){
// Choose a random starting state from all states which are represented via rows
int state = rand() % rows;
// Do while the goal is not reached, in this case while the state != 5
while (true){
// Select one random action from this state call it x, this action should be possible
int x = getRandomPossibleAction(state, Reward);
// Get the maximum Q from the x row using QTable
int maximumQ = -1;
for (int i = 0; i < columns; i++){
maximumQ = max(maximumQ, QTable[x][i]);
}
// Update the QTable according to this equation
QTable[state][x] = Reward[state][x] + y * maximumQ;
// Update the state to be the next state which has been chosen randomly
state = x;
if (x == 5){
break;;
}
}
}
}
int getRandomPossibleAction(int state, int Reward[rows][columns]){
// Select random action, this action should be possible
while (true){
int randomAction = rand() % columns;
// If it's possible action break
if (Reward[state][randomAction] != -1 ){
return randomAction;
}
}
}