Code Corona

نسخه‌ی كامل: پیمایش گراف ها به وسیله ی Q-Learning
شما هم اكنون متن قالب بندی نشده را می‌بینید.مشاهده‌ی نسخه‌ی اصلی
سلام
این برنامه پیاده سازیه روش Q-Learning هست که برای یافتن یک هدف در مکان نا معلوم به وسیله ی یک agent نوشته شده.
در این برنامه ابتدا به وسیله ی این الگوریتم و چندین بار حرکت agent در مسیر ، هزینه هایی برای این مسیر پیدا میشه و در پایان هم بهترین مسیر تا هدف رو انتخاب میکنیم ( من در این برنامه از الگوریتم Dijkstra برای این کار استفاده کردم )
به این نکته هم توجه داشت باشین که بهترین مسیر در این روش ، مسیری است که بزرگترین هرینه رو در هر مرحله انتخاب کنه ( همون روش greedy خودمونه )
کد:
//Q-Learning
//Routing graph
//written by soheil setayeshi
#include<iostream>
#include<time.h>
#include<vector>
using namespace std;

const int SIZE=6;
//*********************
struct node
{
       int destination;
       int s;
       float distance;
       string p;
};
//*********************
int max_distance(node []);
void dijkstra ( int ,int, float [][6] );
//*********************

class Q_Learning
{
       public:
              Q_Learning(int,int);
              void displayR();
              void displayQ();
              void routing();
       private:
               float Q_cal(int,int);
               float find_max(int);
               float Q[6][6];
               float R[6][6];
               int start_state;
               int goal_state;
               float Y;
               // state shoroo ham bayad bashe baadan
};
//*********************
Q_Learning :: Q_Learning(int s , int g)
{
           int i,j,k,l,state,action;
           float temp;
           vector<int> states;
           Y=0.8;
           start_state=s;
           goal_state=g;
           for ( i=0 ; i<SIZE ; i++ )
               for (j=0 ; j<SIZE ; j++ )
               {
                   Q[i][j]=0;
                   R[i][j]=-1;
               }
           R[0][4]=0;R[1][3]=0;R[1][5]=0;
           R[2][3]=0;R[3][1]=0;R[3][2]=0;
           R[3][4]=0;R[4][0]=0;R[4][3]=0;
           R[4][5]=0;R[5][1]=0;R[5][4]=0;
           R[5][5]=0;
          
           for (i=0 ; i<SIZE ; i++)
               if ( R[i][goal_state] == 0 )
                  R[i][goal_state] = 100;
          
           srand(time(0));
          
           //namayeshe R
           displayR();
          
           //shoroo e episode ha
           for (k=0 ; k<1200 ; k++)
           {
               state=rand()%SIZE;
               //peida kardane state haye havi meghdar
               for (i=0 ; i<SIZE ; i++)
               {
                   if (R[state][i] != -1)
                   states.push_back(i);
               }
               //peida kardane action
               action = states [ rand() % states.size() ];
               Q [state][action] = Q_cal ( state , action );
               states.clear();
               state = action;
               if ( !(k%10) )
                  displayQ();
               //shoroo e tekrar ha ta mogheye residan be hadaf
               while (state != goal_state)
               {
                     for (i=0 ; i<SIZE ; i++)
                     {
                         if (R[state][i] != -1)
                         states.push_back(i);
                     }
                     action = states [ rand() % states.size() ];
                     Q [state][action] = Q_cal ( state , action );
                     states.clear();
                     state = action;
               }
           }
           system("pause");
           //normal sazi Q
           temp=Q[0][0];
           for (i=0 ; i<SIZE ; i++)
               for (j=0 ; j<SIZE ; j++)
                   if (temp < Q[i][j])
                      temp=Q[i][j];
           for (i=0 ; i<SIZE ; i++)
               for (j=0 ; j<SIZE ; j++)
                   Q[i][j]=Q[i][j]/temp*100;
                              
           displayQ();
           routing();
}
//*********************
float Q_Learning :: Q_cal(int a , int b)//mohasebeye formool Q-Learning
{
    return R[a][b] + Y * find_max( b );
}
//*********************
float Q_Learning :: find_max ( int b )//yaftane max state
{
      int i;
      vector<int> states;
      for (i=0 ; i<SIZE ; i++)
      {
          if (R[b][i] != -1)
          states.push_back(i);
      }
      if ( ! states.size() ) return 0;
      float max = Q[b][ states[0] ];
      for (i=1 ; i<states.size() ; i++)
          if ( max < Q[b][ states[i] ] )
             max=Q[b][ states[i] ];
      return max;
}
//*********************
void Q_Learning :: routing()
{
     int i,j;
     float temp[6][6];
     for (i=0 ; i<SIZE ; i++)
         for (j=0 ; j<SIZE ; j++)
         {
             temp[i][j]=Q[i][j];
             if (temp[i][j] == 0 && i!=j)
                temp[i][j]=std::numeric_limits<float>::min(); // MIN_FLOAT
         }
     dijkstra ( start_state ,goal_state,  temp );
}
//*********************

void Q_Learning :: displayR()//namayeshe maghadire R
{
     int i,j;
     char item[6]={'A','B','C','D','E','F'};
     system("cls");
     cout<<"\n -1 --> No way\n 0 --> Is way\n 100 --> Direct way\n";
     cout<<"\nRoads matrix = :\n\n  ";
     for (i=0 ; i<6 ; i++)
         cout<<item[i]<<"  ";
     cout<<endl;
     for (i=0 ; i<SIZE ; i++)
     {
         cout<<item[i]<<' ';
         for (j=0 ; j<SIZE ; j++)
             cout<<R[i][j]<<' ';
         cout<<endl;
     }
     system("pause");
}
//*********************
void Q_Learning :: displayQ()//namayeshe maghadire Q
{
     int i,j;
     char item[6]={'A','B','C','D','E','F'};
     system("cls");
    
     cout<<"\nQ = :\n\n  ";
     for (i=0 ; i<SIZE ; i++)
         cout<<item[i]<<' ';
     cout<<endl;
     for (i=0 ; i<SIZE ; i++)
     {
         cout<<item[i]<<' ';
         for (j=0 ; j<SIZE ; j++)
             cout<<Q[i][j]<<' ';
         cout<<endl;
     }
     cout<<endl;  
}
//*********************
int main()
{
    int i,j,s,g;
    cout<<"\nA:0  B:1  C:2  D:3  E:4  F:5\n";
    while (1)
    {
          cout<<"\nEnter Start state: ";
          cin>>s;
          if (s<0 || s> SIZE-1)
          {
             cout<<'\a';
             system("cls");
             continue;
          }
          else break;
    }
    while (1)
    {
          cout<<"\nEnter Goal state: ";
          cin>>g;
          if (g<0 || g> SIZE-1)
          {
             cout<<'\a';
             system("cls");
             continue;
          }
          else break;
    }
    Q_Learning ob(s,g);
    return 0;
}
//*********************
int max_distance(node a[6])
{
    int max_index,i;
    float max=numeric_limits<float>::min();
    for (i=0 ; i<SIZE ; i++)
    {
        if (a[i].distance > max && a[i].s == 0)
        {
           max = a[i].distance;
           max_index = i;
        }
    }
    return max_index;
}
//*********************
void dijkstra ( int v ,int g, float P[6][6] )
{
     int u,i,j,k;
     float NP[6][6];
     char ch[6]={'A','B','C','D','E','F'};
     node state[6];
    
     for (i=0 ; i< SIZE ; i++)
         for (j=0 ; j< SIZE ; j++)
         {
             NP[i][j]=P[i][j];
             if (NP[i][j] == 0 && i!=j)
                NP[i][j]=std::numeric_limits<float>::min(); // MIN_FLOAT
         }
    
     for (i=0 ; i< SIZE ; i++)
     {
         state[i].destination=i;
         state[i].s=0;
         state[i].distance=NP[v][i];
         state[i].p = v+'0';
     }
     state[v].s=1;
    
    
     for (j=1 ; j< SIZE ; j++)
     {
         u=max_distance(state);
         state[u].s=1;
         state[u].p += u+'0';
        
         for (k=0 ; k< SIZE ; k++)
             if (state[k].s == 0)
             {
                if ( NP[u][k] != std::numeric_limits<float>::min() )
                   if ( state[k].distance < state[u].distance + NP[u][k] )
                   {
                        state[k].distance = state[u].distance + NP[u][k];
                        state[k].p = state[u].p;
                   }
             }
            
     }
     /*for (i=0 ;i< SIZE ; i++)
     {
         cout<<"From "<<ch[v]<<" (start state) to ";
         cout<<ch[state[i].destination]<<'\t';
         //cout<<state[i].s<<'\t';
         cout<<"Distance: ";
         cout<<(int)state[i].distance<<"\t\t";

         cout<<"Rout:";
         for (j=0 ; j<state[i].p.size(); j++)
             cout<<' '<<ch[ state[i].p[j]-48 ];
         cout<<endl;
     }*/
     cout<<"\nStart state to Goal state:\n\n";
     cout<<"From "<<ch[v]<<"(Start state) to "<<ch[g]<<"(Goal state):\n\n";
     cout<<"Cost: "<<(int)state[g].distance;
     cout<<"\tRoute: ";
    
     for (j=0 ; j<state[g].p.size(); j++)
         cout<<' '<<ch[ state[g].p[j]-48 ]<<' ';
     cout<<"\n\n";
    
    
     system("pause");
}
آدرس اصلی