go to previous page   go to home page   go to next page



Complete Program

Here is the complete program, suitable for copying, pasting, and experimentation:

import java.io.*;

class URLscan
  BufferedReader  in ;  

  URLscan( BufferedReader br )
    in = br ;

  public String getURL() throws IOException
    final char space = ' ';
    String buffer = null;   // buffer for the URL 
    int  state = 0;         // the current state
    int  readInt;           // value returned by read()
    char current;           // the current character

    while ( (readInt = in.read() ) != -1 )
      current = (char)readInt;

      if      ( state==0 && (current == 'H' || current == 'h') )
        state = 1;
      else if ( state==0 )      
        state  = 0; 
      else if ( state==1 && (current == 'T' || current == 't') )
        state = 2;
      else if ( state==1 )      
        state  = 0; 
      else if ( state==2 && (current == 'T' || current == 't') )
        state = 3;
      else if ( state==2 )      
        state  = 0; 
      else if ( state==3 && (current == 'P' || current == 'p') )
        state = 4;
      else if ( state==3 )      
        state  = 0; 
      else if ( state==4 && current == ':' )
        state = 5;
      else if ( state==4 )      
        state  = 0; 
      else if ( state==5 && current == '/' )
        state = 6;
      else if ( state==5 )      
        state  = 0; 
      else if ( state==6 && current == '/' )
        state  = 7;
        buffer  =  "http://"; // init buffer
      else if ( state==6 )      
        state  = 0; 
      else if ( state==7 && current > space  )
        state  = 7;
        buffer += current ; // append current character to buffer
      else if ( state==7 )
        return buffer ;

    return null;


public class URLextract

  public static void main ( String[] args ) throws IOException
    BufferedReader br;
    FileReader     fr;
    String         url;

    if ( args.length != 1 )
      System.out.println("URLextract inputFile");
      System.exit( -1 );

    fr = new  FileReader( args[0] );
    br = new BufferedReader( fr );

    URLscan scan = new URLscan( br );

    while ( (url = scan.getURL()) !=  null )
      System.out.println( url );

    System.out.println("\n\nDone" );




In the "real world" would you implement a URL extractor in the way this program is implemented?