main: // AbortMethod(am_Error) scrsetmetrics ,,,,0,1 //------------------------------------------------------------------ // Change the following items to establish genetic traits // Reduce controls the maximum amount used for the delta weights // Normally, this should not have to be altered // MaxTries controls the number of times the robot will try to learn // when past memories do not produce some form of pleasure // Enforce controls how much (if any) reenforment is given to past // memories that work to produce pleasure // Per is the maximum % of the weights that will be changed during // a learning activity Reduce = 1 MaxTries = 5 // try up to 5 random actions for each sensory situation Enforce = 1.001 // increase relevant weights by 1/10 of 1% Per = 15 // the following items control the physical properties of the network ns =3 // number of sensor inputs nn =40 // number of neurons in hidden layer nOut = 2 // number of outputs LearnLineFollow = True // if False, the robot will Follow Walls //------------------------------------------------------------------ gosub Init while True Good = False // is the robot learning lines or walls if LearnLineFollow LS = rSense() else if rRange(-90)<47 LS = True else LS = False endif endif if LS // only try to learn if there is sensory input NumMoves++ if NumMoves=50 // after 50 moves, calculate the % of movements that were controlled by memory React = (100*NumReacts)/NumMoves xyString 400,10,"% Reacting = ",React," " NumMoves=0 NumReacts=0 gosub MaxMin // display current Max/Min endif gosub ForwardProp gosub PerformActions gosub CheckGood if Good NumReacts++ gosub ReenforceWts else // current knowledge is not working - so try to learn gosub ReverseActions // prepare to try again for try=1 to MaxTries // try up to 10 random operations // the more learned the less it tries to learn NumTries++ if NumTries=50 // after 50 attempts to learn, calculate the % that were successful xyString 650,10,"% Good = ",(100*NumGood)/NumTries," " NumGood=0 NumTries=0 gosub MaxMin // display current Max/Min endif gosub CreateDeltaWeights // adds to current weights gosub ForwardProp gosub PerformActions gosub CheckGood if not Good gosub ForgetChanges gosub ReverseActions else NumGood++ gosub ReenforceWts break //keeps random changes that worked plus some endif next endif endif // nothing has worked so let genetic curiosity take over if not Good R = Random(100) if R>90 then rTurn -1*(Random(15)+5) if R<10 then rTurn Random(15)+5 if not (rBumper()&4) then rForward 1 gosub AvoidPain endif wend end Init: gosub Introduction ClearScr Good = False NumTries = 0 // keeps track of the amount of times the robot tries to learn something new NumGood = 0 // keeps track of the number of times attempting to learn was successful NumMoves = 0 // keeps track of the total number of times the robot moves NumReacts = 0 // keeps track of how many of the robot's movements were based on memories if not LearnLineFollow then ns = 2 // only two sensors for wall hugging // the following arrays hold values relating to the neural network // wt 1st level, wt 2nd level, delta wts 1, delta wts 2, Inputs, Hidden Out, Outputs, Good states dim WtL1[ns,nn], WtL2[nn,nOut], DW1[ns,nn], DW2[nn,nOut], In[ns], H[nn], Out[nOut], GoodStates[50] dim Action[50] // "good" action that triggered sensor state to be saved Action[0]=1 // first element keeps track of number of active elements in array Action[1]=2 // middle sensor ON LineWidth 3 Rectangle 3,3,790,35 Rectangle 3,35,790,588 // set aLL initial weights to zero for n=0 to ns-1 // each neuron in layer 1 for w=0 to nn-1 // the weights for each neuron WtL1[n,w]= 0 DW1[n,w] = 0 next next for n=0 to nn-1 // each neuron in layer 2 for w = 0 to nOut-1 // the weights for each neuron WtL2[n,w]=0 DW2[n,w]=0 next next // create robot rLocate 460,340 // and the environment rInvisible GREEN LineWidth 5 SetColor GREEN line 150,150,200,160 lineto 300,160 lineto 400,150 lineto 500,150 lineto 600,140 lineto 670,150 line 250,310,310,330 lineto 350,380 lineto 400,390 lineto 450,390 lineto 500,400 lineto 550,390 line 420,300,500,230 lineto 600,200 lineto 700,160 line 100,350,150,300 lineto 250,250 lineto 350,240 line 65,100,90,150 lineto 170,230 line 300,490,400,500 lineto 500,490 lineto 600,500 lineto 700,490 line 65,160,70,250 lineto 60,350 lineto 70,450 line 100,100,200,90 lineto 300,100 lineto 400,90 lineto 500,100 lineto 550,90 lineto 650,110 line 100,540,200,530 lineto 300,540 lineto 400,530 lineto 500,540 lineto 550,530 lineto 650,540 line 720,90,730,170 lineto 720,250 lineto 730,290 lineto 730,350 lineto 735,400 lineto 720,460 line 350,420,400,450 lineto 450,450 lineto 500,460 Circle 530,250,680,450,BLACK,RED Circle 100,350,300,480,BLACK,RED Circle 300,200,410,280,BLACK,RED //Circle 570,50,700,110,BLACK,RED return Introduction: Print " A ROBOT THAT LEARNS ON ITS OWN" Print " by John Blankenship" Print " email: RobotBASIC@yahoo.com webpage: RobotBASIC.org" Print Print "This program creates a robot whose primary movements are control by a neural network." Print "The robot will move around the screen and learn to follow the lines. Think of the lines" Print "as a food source. It gets pleasure from staying on the lines (eating)." Print Print "Generally the robot is always learning new things - which means it is will also be" Print "forgetting some things. When this happens the robot will relearn things - given enough" Print "time, the robot might learn everything needed to follow the lines. Usually though, the" Print "robot will appear to get bored or loose concentration - but only momentarily as it" Print "learns things it has forgotten" Print Print "There are three things displayed at the top of the screen. On the left side is the Max" Print "and Min values for the weights controlling the neural network. In the center, is the " Print "percentage of the time the robot is REACTING to internal memories. If the value is 90," Print "then the robot is is learning 10% of the time. A value of 100 does NOT mean the robot" Print "is perfect. It only means that of the last 50 moves (the displays are updated every 50" Print "moves) the robot has reacted to internal memories every time (perhaps because it has" Print "not encountered something it has not learned how to handle)." Print Print "The last item displayed (on the right side) is the percentage of time that the robot is" Print "successful learning something new. It learns by simply randomizing the weights in the" Print "neural network. If this randomization produces a movement (turn left or right) that" Print "causes pleasure (following the line) then those weights are kept - otherwise they are" Print "discarded. You can alter the criteria used to determine if actions produce pleasure." Print Print "You can also alter many other factors such as the size of the neural network or the" Print "maximum percentage of weights that are randomly changed. The program is well commented" Print "to help you experiment with your own modifications. The details of how this program" Print "works was provided in an article in Servo Magazine." Print Input " PRESS ENTER TO CONTINUE",A ClearScr Print "In order to show that neural networks can be used to for various types of learning" Print "you can tell this robot if you want it to learn to follow lines or to hug walls." Print Print "Depending on the choice, the program is modified in two ways. First, the inputs" Print "to the neural network are different (the inputs either tell the robot where it is" Print "on the line, or where it is in relationship to the wall." Print Print "The second change has to do with what the robot sees as positive feedback (pleasure)." Print "The robot 'feels' pleasure if any of the line sensors sees the line, or if the robot" Print "stays within a specified range from a wall (exterior walls are excluded)." Print Print "There is nothing in the program that tells the robot HOW to follow a line or hug a" Print "wall. It learns by trying random weights and keeping those that work and rejecting" Print "those that don't. Just as a newborn animal, the robot tries things and eventually" Print "learns what gives it pleasure." Print Print "Press a L to make the robot learn to follow lines or a W to make the robot learn to" Print "hug walls." Print Print " PRESS L or W TO CONTINUE" repeat GetKey k until k=0 repeat GetKey k k=Char(k) k=Upper(k) until k="L" or k="W" if k="W" then LearnLineFollow = False return ForwardProp: // This routine calculates the outputs from the neural net // based ont the current sensory inputs gosub SetInputs // sets the values of the input array //====== calculate first layer (hidden layer) ======== // zero out current values of hidden layer for i=0 to nn-1 H[i]=0 next // Sum up all the weighted inputs for i=0 to ns-1 // go through inputs for w=0 to nn-1 // weights H[w] += In[i]*WtL1[i,w] next next // now apply threashold for i=0 to nn-1 if H[i]>0 H[i]=1 else H[i]=0 endif next //====== calculate output values ========== // zero out current values for i=0 to 1 Out[i]=0 next // Sum up all the weighted inputs for h=0 to nn-1 // go through inputs for w=0 to nOut-1 // weights Out[w] += H[h]*WtL2[h,w] next next // now apply threashold for h=0 to nOut-1 if Out[h]>0 Out[h]=1 else Out[h]=0 endif next if Out[0] and Out[1] // cannot turn both left and right Out[0]=0 Out[1]=0 endif return PerformActions: // this routine performs the actions dictated by the output from the neural net LS=rSense() // save current sensory state OldSensorState = LS if Out[0] then rTurn -1 if Out[1] then rTurn 1 if not (rBumper()&4) then rForward 1 gosub AvoidPain return ReverseActions: // make the robot attempt to return to original position // this is typically performed after a random try does not produce pleasure // this action is not really necessary, but allowing the robot to try // a number of random actions for the same situation greatly speeds up the // learning process LS = rSense() rForward -1 // backup if Out[0] // turn right till sensors change //while (rSense()=LS) rTurn 1 //wend endif if Out[1] // turn left till sensors change //while (rSense()=LS) rTurn -1 //wend endif return AvoidPain: // This is a low-level genetic response that prevents the robot from // causing errors from colliding with objects. It is similar to an // animals automatic response to pain (such as pulling your hand away // from a fire or needle BS = rBumper() if BS // pain if random(10)<5 while rBumper()!=1 rturn -1 wend else while rBumper()!=1 rturn 1 wend endif endif rGPS xx,yy if xx<45 or xx>755 or yy<80 or yy>553 rTurn 40 for ii=1 to 40 if not rBumper() then rForward 1 next endif while rFeel()&12 rTurn 1 wend return CreateDeltaWeights: // this routine changes some percentage of the nnet weights by a tiny amount // first create a set of delta weights in the DW1 and DW2 arrays for n=0 to ns-1 // each neuron in layer 1 for w=0 to nn-1 // the weights for each neuron A = Reduce*(random(50000)/500000.0) // amount to change each weight if random(100)=30) and (D<=45) then Good = True endif return /* NewSensorState=rSense() // see if needs directly satisfied if Out[0] and Out[1] then return // jittery movement should not be good if (OldSensorState = 1) and (NewSensorState = 2) then Good = True if (OldSensorState = 4) and (NewSensorState = 6) then Good = True if (OldSensorState = 4) and (NewSensorState = 2) then Good = True if (OldSensorState = 1) and (NewSensorState = 3) then Good = True if (OldSensorState = 3) and (NewSensorState = 2) then Good = True if (OldSensorState = 6) and (NewSensorState = 2) then Good = True if (OldSensorState = 6) and (NewSensorState = 3) then Good = True if (OldSensorState = 3) and (NewSensorState = 6) then Good = True if (OldSensorState = 2) and (NewSensorState = 2) then Good = True if (OldSensorState = 2) and (NewSensorState = 3) then Good = True if (OldSensorState = 2) and (NewSensorState = 6) then Good = True if (OldSensorState = 6) and (NewSensorState = 6) then Good = True if (OldSensorState = 3) and (NewSensorState = 3) then Good = True if (OldSensorState = 7) and (NewSensorState = 3) then Good = True if (OldSensorState = 7) and (NewSensorState = 6) then Good = True if (OldSensorState = NewSensorState) and rSense() then Good = True if (OldSensorState=2) and (NewSensorState = 0) then Good = True // prevents end of line from being seen as a bad state return CheckGoodList: // if current state is on good list set Good to TRUE if not Action[0] then return // nothing on the list v = rSense() for j =1 to Action[0] if Action[j] = v then Good=True\exit next MyCount = Action[0] if Good then gosub AddToGoodList xyString 10,570,Action[0] return AddToGoodList: // first check to see if state is already on the list IsOn = False v = rSense() if not Action[0] for j=1 to Action[0] if Action[j] = v then IsOn=True\exit next endif // and add it, if needed if not IsOn Action[0]++ Action[Action[0]]=v endif return */ ForgetChanges: // subtract delta weights from all connections // is is performed when the new weights do not produce pleasure for n=0 to ns-1 // each neuron in layer 1 for w=0 to nn-1 // the weights for each neuron WtL1[n,w] -= DW1[n,w] next next for n=0 to nn-1 // each neuron in layer 2 for w = 0 to nOut-1 // the weights for each neuron WtL2[n,w] -= DW2[n,w] next next return SetInputs: if LearnLineFollow OldSensorState=rSense()//+(rFeel()<<3) // First make all inputs zero for i=0 to ns-1 In[i]=0 next // now set 3 based on Line sensors if rSense()&1 then In[0]=1 if rSense()&2 then In[1]=1 if rSense()&4 then In[2]=1 else // use for wall hugging (later) for i= 0 to ns-1 In[i]=0 next D=rRange(-90) if D<30 then In[0]=1 if D>=30 then In[1]=1 endif return MaxMin: // This routine calculates and displays the max and min values // of the weights. m=0 M=0 for n=0 to ns-1 // each neuron in layer 1 for w=0 to nn-1 // the weights for each neuron if m>WtL1[n,w] then m=WtL1[n,w] if MWtL2[n,w] then m=WtL2[n,w] if M1 or m<-1 // automatically reduce weights if they are getting too large for n=0 to ns-1 // each neuron in layer 1 for w=0 to nn-1 // the weights for each neuron WtL1[n,w]*=.5 next next for n=0 to nn-1 // each neuron in layer 2 for w = 0 to nOut-1 // the weights for each neuron WtL2[n,w]*=.5 next next endif return