QLearningAlgorithm/QLearningAlgorithm.cpp at main · SarahAbuirmeileh/QLearningAlgorithm · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
//  Team Members 👩🏻‍💻👩🏻‍💻
// - Sarah Abu irmeileh
// - Asia Shalaldah

#include <iostream>
#include <cmath>

using namespace std;

// Constants for the array dimensions
const int rows = 6;
const int columns = 6;

// The headers for all functions
void printArray(int array[rows][columns]);
void initializeRewardArray(int Reward[rows][columns]);
int getRandomPossibleAction(int state, int Reward[rows][columns]);
void QLearningAlgorithm(int Reward[rows][columns], int QTable[rows][columns], double y, int episodes);

int main (){

    // The rows represents the states and the columns represents the actions
    int Reward[rows][columns];

    // We want to reach F. Hence, F-> is the goal
    // We can add 'A' to each row & column to make the result more familiar to the table in the Readme file

    // Assigning the values to the Reward array
    initializeRewardArray(Reward);

    cout << "The Reward array is : " << endl << endl;
    printArray(Reward);
    cout << endl << endl;

    // Initialize the Q-table array and initially all it's values are -1
    int QTable[rows][columns];
    for (int i = 0; i < rows; ++i) {
        for (int j = 0; j < columns; ++j) {
            QTable[i][j] = -1;
        }
    }

    // The # of trials for computer to learn, usually the # is big
    int episode = 1000;

    // Constant for learning rate
    double y = 0.8;

    // Seed for random number generator
    srand(time(0));

    // Apply Q-learning algorithm
    QLearningAlgorithm(Reward, QTable, y, episode);

    cout << "The Q table after " << episode << " episodes is : "  << endl << endl;
    printArray(QTable);
    cout << endl;

    return 0;
}

void printArray(int array[rows][columns]){

    for (int i = 0; i < rows; i++){
        for (int j = 0; j < columns; j++){
            cout <<  array[i][j] << " ";
        }
        cout << endl;
    }
}

void initializeRewardArray(int Reward[rows][columns]){

    // Assigning the values to the Reward array as the picture in the ReadMe file
    for (int i = 0; i < rows; i++){
        for (int j = 0; j < columns; j++){
            if (j == 5 && (i == 1 || i == 4 || i == 5)){

                // If the column is F (If we reach the goal) give it big value
                // In other words if there is a direct action from current state to the gaol state
                Reward[i][j] = 100;

            }else if ((i == 0 && j == 4) || ((i == 1 || i == 2) && ( j == 3)) || (i == 5 && ( j == 1 || j == 4)) ){

                // If there is a possible action give it value 0
                Reward[i][j] = 0;

            }else if (((i == 3) && (j == 1 || j == 2 || j == 4)) || ( i == 4 && ( j == 0 || j == 3))){

                // If there is a possible action give it value 0
                Reward[i][j] = 0;
            }else{

                // If there is no possible action (movement) give it -1
                Reward[i][j] = -1;
            }
        }
    }
}


void QLearningAlgorithm(int Reward[rows][columns], int QTable[rows][columns], double y, int episodes){

    // Loop for all episodes
    while(episodes--){

        // Choose a random starting state from all states which are represented via rows
        int state = rand() % rows;

        // Do while the goal is not reached, in this case while the state != 5
        while (true){

            // Select one random action from this state call it x, this action should be possible
            int x = getRandomPossibleAction(state, Reward);

            // Get the maximum Q from the x row using QTable
            int maximumQ = -1;
            for (int i = 0; i < columns; i++){
                maximumQ = max(maximumQ, QTable[x][i]);
            }

            // Update the QTable according to this equation
            QTable[state][x] = Reward[state][x] + y * maximumQ;

            // Update the state to be the next state which has been chosen randomly
            state = x;
            if (x == 5){
                break;;
            }
        }
    }
}

int getRandomPossibleAction(int state, int Reward[rows][columns]){

    // Select random action, this action should be possible
    while (true){

        int randomAction = rand() % columns;

        // If it's possible action break
        if (Reward[state][randomAction] != -1 ){
            return randomAction;
        }
    }
}