main:
 // AbortMethod(am_Error)
  scrsetmetrics ,,,,0,1
  //------------------------------------------------------------------
  // Change the following items to establish genetic traits
  // Reduce controls the maximum amount used for the delta weights
  //        Normally, this should not have to be altered
  // MaxTries controls the number of times the robot will try to learn
  //        when past memories do not produce some form of pleasure
  // Enforce controls how much (if any) reenforment is given to past 
  //        memories that work to produce pleasure
  // Per is the maximum % of the weights that will be changed during
  //        a learning activity
  Reduce = 1 
  MaxTries = 5  // try up to 5 random actions for each sensory situation
  Enforce = 1.001 // increase relevant weights by 1/10 of 1%
  Per = 15
  // the following items control the physical properties of the network
  ns =3    // number of sensor inputs
  nn =40   // number of neurons in hidden layer
  nOut = 2 // number of outputs
  LearnLineFollow = True // if False, the robot will Follow Walls
  //------------------------------------------------------------------
  gosub Init
  while True  
    Good = False
    // is the robot learning lines or walls
    if LearnLineFollow  
      LS = rSense()
    else
      if rRange(-90)<47 
        LS = True
      else
        LS = False
      endif
    endif 
    if LS          // only try to learn if there is sensory input
      NumMoves++
      if NumMoves=50  // after 50 moves, calculate the % of movements that were controlled by memory
        React = (100*NumReacts)/NumMoves
        xyString 400,10,"% Reacting = ",React,"    "
        NumMoves=0
        NumReacts=0
        gosub MaxMin // display current Max/Min
      endif            
      gosub ForwardProp   
      gosub PerformActions
      gosub CheckGood
      if Good
        NumReacts++
        gosub ReenforceWts
      else
        // current knowledge is not working - so try to learn        
        gosub ReverseActions // prepare to try again
        for try=1 to MaxTries // try up to 10 random operations 
                               // the more learned the less it tries to learn       
          NumTries++
          if NumTries=50 // after 50 attempts to learn, calculate the % that were successful
            xyString 650,10,"% Good = ",(100*NumGood)/NumTries,"    "
            NumGood=0
            NumTries=0
            gosub MaxMin  // display current Max/Min
          endif       
          gosub CreateDeltaWeights   // adds to current weights
          gosub ForwardProp
          gosub PerformActions
          gosub CheckGood
          if not Good
            gosub ForgetChanges
            gosub ReverseActions
          else
            NumGood++
            gosub ReenforceWts
            break //keeps random changes that worked plus some
          endif 
        next               
      endif
    endif  
    // nothing has worked so let genetic curiosity take over
    if not Good
      R = Random(100)
      if R>90 then rTurn -1*(Random(15)+5)
      if R<10 then rTurn Random(15)+5          
      if not (rBumper()&4) then rForward 1     
      gosub AvoidPain
    endif  
          
  wend
end
    


Init: 
  gosub Introduction
  ClearScr
  Good = False
  NumTries = 0  // keeps track of the amount of times the robot tries to learn something new
  NumGood = 0   // keeps track of the number of times attempting to learn was successful
  NumMoves = 0  // keeps track of the total number of times the robot moves
  NumReacts = 0 // keeps track of how many of the robot's movements were based on memories
  if not LearnLineFollow then ns = 2 // only two sensors for wall hugging
  // the following arrays hold values relating to the neural network
  //  wt 1st level, wt 2nd level, delta wts 1, delta wts 2,  Inputs, Hidden Out, Outputs,   Good states
  dim WtL1[ns,nn],  WtL2[nn,nOut], DW1[ns,nn], DW2[nn,nOut], In[ns], H[nn],      Out[nOut], GoodStates[50]
  dim Action[50] // "good" action that triggered sensor state to be saved
  Action[0]=1  // first element keeps track of number of active elements in array
  Action[1]=2  // middle sensor ON
  LineWidth 3
  Rectangle 3,3,790,35
  Rectangle 3,35,790,588
  // set aLL initial weights to zero
  for n=0 to ns-1 // each neuron in layer 1
    for w=0 to nn-1 // the weights for each neuron
      WtL1[n,w]= 0
      DW1[n,w] = 0
    next
  next
  for n=0 to nn-1 // each neuron in layer 2
    for w = 0 to nOut-1 // the weights for each neuron
      WtL2[n,w]=0
      DW2[n,w]=0
    next
  next
  
  // create robot
  rLocate 460,340
  // and the environment
  rInvisible GREEN
  LineWidth 5
  SetColor GREEN
  line 150,150,200,160
  lineto 300,160
  lineto 400,150
  lineto 500,150
  lineto 600,140
  lineto 670,150
  line 250,310,310,330
  lineto 350,380
  lineto 400,390
  lineto 450,390
  lineto 500,400
  lineto 550,390                                               
  line 420,300,500,230
  lineto 600,200
  lineto 700,160
  line 100,350,150,300
  lineto 250,250
  lineto 350,240
  line 65,100,90,150
  lineto 170,230
  line 300,490,400,500
  lineto 500,490
  lineto 600,500
  lineto 700,490
  line 65,160,70,250
  lineto 60,350
  lineto 70,450
  line 100,100,200,90
  lineto 300,100
  lineto 400,90
  lineto 500,100
  lineto 550,90
  lineto 650,110
  line 100,540,200,530
  lineto 300,540
  lineto 400,530
  lineto 500,540
  lineto 550,530
  lineto 650,540
  line 720,90,730,170
  lineto 720,250 
  lineto 730,290
  lineto 730,350
  lineto 735,400
  lineto 720,460
  line 350,420,400,450
  lineto 450,450
  lineto 500,460
  Circle 530,250,680,450,BLACK,RED
  Circle 100,350,300,480,BLACK,RED
  Circle 300,200,410,280,BLACK,RED
  //Circle 570,50,700,110,BLACK,RED
  
return

Introduction:
  Print "                        A ROBOT THAT LEARNS ON ITS OWN"
  Print "                             by John Blankenship"
  Print "            email: RobotBASIC@yahoo.com     webpage: RobotBASIC.org"
  Print
  Print "This program creates a robot whose primary movements are control by a neural network."
  Print "The robot will move around the screen and learn to follow the lines.  Think of the lines"
  Print "as a food source.  It gets pleasure from staying on the lines (eating)."
  Print
  Print "Generally the robot is always learning new things - which means it is will also be"
  Print "forgetting some things.  When this happens the robot will relearn things - given enough"
  Print "time, the robot might learn everything needed to follow the lines.  Usually though, the"
  Print "robot will appear to get bored or loose concentration - but only momentarily as it"
  Print "learns things it has forgotten"
  Print 
  Print "There are three things displayed at the top of the screen.  On the left side is the Max"
  Print "and Min values for the weights controlling the neural network.  In the center, is the "
  Print "percentage of the time the robot is REACTING to internal memories.  If the value is 90,"
  Print "then the robot is is learning 10% of the time.  A value of 100 does NOT mean the robot"
  Print "is perfect.  It only means that of the last 50 moves (the displays are updated every 50"
  Print "moves) the robot has reacted to internal memories every time (perhaps because it has"
  Print "not encountered something it has not learned how to handle)."
  Print
  Print "The last item displayed (on the right side) is the percentage of time that the robot is"
  Print "successful learning something new.  It learns by simply randomizing the weights in the"
  Print "neural network.  If this randomization produces a movement (turn left or right) that"
  Print "causes pleasure (following the line) then those weights are kept - otherwise they are"
  Print "discarded.  You can alter the criteria used to determine if actions produce pleasure."
  Print 
  Print "You can also alter many other factors such as the size of the neural network or the"
  Print "maximum percentage of weights that are randomly changed. The program is well commented"
  Print "to help you experiment with your own modifications. The details of how this program"
  Print "works was provided in an article in Servo Magazine."
  Print 
  Input "                            PRESS ENTER TO CONTINUE",A
  ClearScr
  Print "In order to show that neural networks can be used to for various types of learning"
  Print "you can tell this robot if you want it to learn to follow lines or to hug walls."
  Print
  Print "Depending on the choice, the program is modified in two ways.  First, the inputs"
  Print "to the neural network are different (the inputs either tell the robot where it is"
  Print "on the line, or where it is in relationship to the wall."
  Print
  Print "The second change has to do with what the robot sees as positive feedback (pleasure)."
  Print "The robot 'feels' pleasure if any of the line sensors sees the line, or if the robot"
  Print "stays within a specified range from a wall (exterior walls are excluded)."
  Print
  Print "There is nothing in the program that tells the robot HOW to follow a line or hug a"
  Print "wall.  It learns by trying random weights and keeping those that work and rejecting"
  Print "those that don't. Just as a newborn animal, the robot tries things and eventually"
  Print "learns what gives it pleasure."
  Print 
  Print "Press a L to make the robot learn to follow lines or a W to make the robot learn to"
  Print "hug walls."
  Print  
  Print "                        PRESS L or W TO CONTINUE"
  repeat
    GetKey k
  until k=0
  repeat
    GetKey k
    k=Char(k)
    k=Upper(k)   
  until k="L" or k="W"
  if k="W" then LearnLineFollow = False  
return
  
ForwardProp:
  // This routine calculates the outputs from the neural net
  // based ont the current sensory inputs
  gosub SetInputs // sets the values of the input array
  //====== calculate first layer (hidden layer) ========
  // zero out current values of hidden layer
  for i=0 to nn-1
    H[i]=0
  next
  // Sum up all the weighted inputs
  for i=0 to ns-1 // go through inputs
    for w=0 to nn-1 // weights 
      H[w] += In[i]*WtL1[i,w]
    next
  next
  // now apply threashold
  for i=0 to nn-1
    if H[i]>0 
      H[i]=1
    else
      H[i]=0
    endif
  next  
  //====== calculate output values ==========
  // zero out current values
  for i=0 to 1
    Out[i]=0
  next
  // Sum up all the weighted inputs
  for h=0 to nn-1 // go through inputs
    for w=0 to nOut-1 // weights 
      Out[w] += H[h]*WtL2[h,w]
    next
  next
  // now apply threashold
  for h=0 to nOut-1
    if Out[h]>0 
      Out[h]=1
    else
      Out[h]=0
    endif
  next
  if Out[0] and Out[1]  // cannot turn both left and right
    Out[0]=0
    Out[1]=0
  endif
return

PerformActions:
  // this routine performs the actions dictated by the output from the neural net
  LS=rSense() // save current sensory state
  OldSensorState = LS
  if Out[0] then rTurn -1 
  if Out[1] then rTurn 1
  if not (rBumper()&4) then rForward 1 
  gosub AvoidPain 
return

ReverseActions:
  // make the robot attempt to return to original position
  // this is typically performed after a random try does not produce pleasure
  // this action is not really necessary, but allowing the robot to try 
  // a number of random actions for the same situation greatly speeds up the
  // learning process
  LS = rSense()
  rForward -1 // backup
  if Out[0] // turn right till sensors change
    //while  (rSense()=LS)
      rTurn 1 
    //wend
  endif
  if Out[1] // turn left till sensors change
    //while (rSense()=LS)
      rTurn -1
    //wend
  endif    
return

AvoidPain:
  // This is a low-level genetic response that prevents the robot from
  // causing errors from colliding with objects.  It is similar to an
  // animals automatic response to pain (such as pulling your hand away
  // from a fire or needle
  BS = rBumper()
  if BS // pain
    if random(10)<5
      while rBumper()!=1
        rturn -1
      wend
    else
      while rBumper()!=1
        rturn 1
      wend
    endif
  endif  
  rGPS xx,yy
  if xx<45 or xx>755 or yy<80 or yy>553  
    rTurn 40
    for ii=1 to 40
      if not rBumper() then rForward 1
    next
  endif
  while rFeel()&12
    rTurn 1
  wend
return

CreateDeltaWeights:
  // this routine changes some percentage of the nnet weights by a tiny amount
  // first create a set of delta weights in the DW1 and DW2 arrays
  for n=0 to ns-1 // each neuron in layer 1
    for w=0 to nn-1 // the weights for each neuron
      A = Reduce*(random(50000)/500000.0)    // amount to change each weight
      if random(100)<Per
        DW1[n,w]=A*(random(1000)-500.0)/100.0  // ensures about half are negative
      else
        DW1[n,w]=0
      endif
    next
  next
  for n=0 to nn-1 // each neuron in layer 2
    for w = 0 to nOut-1 // the weights for each neuron
      A = Reduce*(random(50000)/500000.0)    // amount to change each weight
      if random(100)<Per 
        DW2[n,w]=A*(random(1000)-500.0)/100.0 // ensures about half are negative
      else
        DW2[n,w]=0
      endif  
    next
  next
  // Add the delta weights to current weights
  for n=0 to ns-1 // each neuron in layer 1
    for w=0 to nn-1 // the weights for each neuron 
      WtL1[n,w]+=DW1[n,w]
    next
  next 
  for n=0 to nn-1 // each neuron in layer 2
    for w = 0 to nOut-1 // the weights for each neuron
      WtL2[n,w]+=DW2[n,w]
    next
  next 
return
  
ReenforceWts:
  // Increase the weights of of any path where either the driving neuron
  // or the neuron being drive is firing.  This routine is performed
  // when a memory causes an action that results in pleasure.  The variable
  // Enforce controls how much to reenforce the path.  A value of 1
  // will not reenforce.  A value of 1.001 will increase the weights by
  // 1/10 of 1%
  for n=0 to ns-1 // each neuron in layer 1
    for w=0 to nn-1 // the weights for each neuron 
      //if (not H[w]) or  (not In[n]) then WtL1[n,w]*=Enforce
      if H[w] and In[n] then WtL1[n,w]*=Enforce
    next
  next 
  for n=0 to nn-1 // each neuron in layer 2
    for w = 0 to nOut-1 // the weights for each neuron
      //if (not H[n]) or (not Out[w]) then WtL2[n,w]*=Enforce
      if H[n] and Out[w] then WtL2[n,w]*=Enforce
    next
  next  
return

 


CheckGood:
  if LearnLineFollow
    // Line follow
    // see if a need has been satisfied
    Good = False // assume not good 
    if rSense() then Good=True
  else
    // Wall follow
    Good = False
    D = rRange(-90)
    if (D>=30) and (D<=45) then Good = True
  endif
return  

/*
  NewSensorState=rSense() 
  // see if needs directly satisfied
  if Out[0] and Out[1] then return // jittery movement should not be good
  if (OldSensorState = 1) and (NewSensorState = 2) then Good = True   
  if (OldSensorState = 4) and (NewSensorState = 6) then Good = True
  if (OldSensorState = 4) and (NewSensorState = 2) then Good = True
  if (OldSensorState = 1) and (NewSensorState = 3) then Good = True
  if (OldSensorState = 3) and (NewSensorState = 2) then Good = True
  if (OldSensorState = 6) and (NewSensorState = 2) then Good = True
  if (OldSensorState = 6) and (NewSensorState = 3) then Good = True
  if (OldSensorState = 3) and (NewSensorState = 6) then Good = True
  if (OldSensorState = 2) and (NewSensorState = 2) then Good = True
  if (OldSensorState = 2) and (NewSensorState = 3) then Good = True
  if (OldSensorState = 2) and (NewSensorState = 6) then Good = True
  if (OldSensorState = 6) and (NewSensorState = 6) then Good = True
  if (OldSensorState = 3) and (NewSensorState = 3) then Good = True
  if (OldSensorState = 7) and (NewSensorState = 3) then Good = True
  if (OldSensorState = 7) and (NewSensorState = 6) then Good = True
  if (OldSensorState = NewSensorState) and rSense() then Good = True
  if (OldSensorState=2) and (NewSensorState = 0)  then Good = True // prevents end of line from being seen as a bad state
return

CheckGoodList:
  // if current state is on good list set Good to TRUE
  if not Action[0]  then return // nothing on the list
  v = rSense()
  for j =1 to Action[0]
    if Action[j] = v then Good=True\exit
  next
  MyCount = Action[0]
  if Good then gosub AddToGoodList
  xyString 10,570,Action[0]
return

AddToGoodList:
  // first check to see if state is already on the list
  IsOn = False
  v = rSense()
  if not Action[0]
    for j=1 to Action[0]
      if Action[j] = v then IsOn=True\exit
    next
  endif
  // and add it, if needed
  if not IsOn
    Action[0]++
    Action[Action[0]]=v
  endif
return
     
*/


  
ForgetChanges:
  // subtract delta weights from all connections
  // is is performed when the new weights do not produce pleasure
  for n=0 to ns-1 // each neuron in layer 1
    for w=0 to nn-1 // the weights for each neuron 
      WtL1[n,w] -= DW1[n,w]
    next
  next 
  for n=0 to nn-1 // each neuron in layer 2 
    for w = 0 to nOut-1 // the weights for each neuron
      WtL2[n,w] -= DW2[n,w]
    next
  next   
return


 
SetInputs:
  if LearnLineFollow
    OldSensorState=rSense()//+(rFeel()<<3)
    // First make all inputs zero
    for i=0 to ns-1
      In[i]=0
    next
    // now set 3 based on Line sensors
    if rSense()&1 then In[0]=1
    if rSense()&2 then In[1]=1    
    if rSense()&4 then In[2]=1
  else
  // use for wall hugging (later)
    for i= 0 to ns-1
      In[i]=0
    next
    D=rRange(-90)
    if D<30 then In[0]=1
    if D>=30 then In[1]=1 
  endif
return

MaxMin:
  // This routine calculates and displays the max and min values
  // of the weights.
  m=0
  M=0
  for n=0 to ns-1 // each neuron in layer 1
    for w=0 to nn-1 // the weights for each neuron
      if m>WtL1[n,w] then m=WtL1[n,w]
      if M<WtL1[n,w] then M=WtL1[n,w]
    next
  next
  for n=0 to nn-1 // each neuron in layer 2

    for w = 0 to nOut-1 // the weights for each neuron
      if m>WtL2[n,w] then m=WtL2[n,w]
      if M<WtL2[n,w] then M=WtL2[n,w]
    next
  next
  m = round(m*10000)/10000.0
  M = round(M*10000)/10000.0
  xyString 20,10,"Wt.Max/Min = ",M," / ",m,"        "
  if M>1 or m<-1 // automatically reduce weights if they are getting too large
    for n=0 to ns-1 // each neuron in layer 1
      for w=0 to nn-1 // the weights for each neuron 
        WtL1[n,w]*=.5
      next
    next 
    for n=0 to nn-1 // each neuron in layer 2
      for w = 0 to nOut-1 // the weights for each neuron
        WtL2[n,w]*=.5
      next
    next
  endif
return