Parsing URI's

A fast and easy way to parse URI's as seen on commercial libraries

NOTE: if you have a Internet library then you already have URI parsing functions, however this may serve as an alternate way, a check on how the parsing algorithm works, a way to spend a credit article, a way to flame somebody else coding, etc

on Indy(Internet Direct) for URI parsing check TIdURI class on idURI unit

on TurboPower Internet Professional for IpMisc unit function IpParseURL

Dont know about ICS though

A URI is the way that an internet address presents itself, most protocols follow a URI. A URI has the following syntax:


Now here is the way I came up for URI parsing, the code is commented

function IsNumber(ACharacter: Char): Boolean;


  Result := Pos(ACharacter, '0123456789') > 0;


{Parse params from a URL into a TStrings


 Dest[0] = 'p=britney+spears'

 To access params and values check TStringList.Names and


procedure SplitParams(const Params: String; Dest: TStrings);


  p: Integer;

  Tmp: String;


  if not Assigned(Dest) then Exit;


  Tmp := Params;

  p := Pos('&', Tmp);

  while (p > 0) do


    Dest.Add(Copy(Tmp, 1, p -1));

    Tmp := Copy(Tmp, P+1, Length(Tmp) - p);

    p := Pos('&', Tmp);




{Given an URL check for a query, return the query starting

 position, and the params in Params, this function uses SplitParams}

procedure ProcessQuery(const URL: String; Params: TStrings; var QueryPos: Integer);


  //Anything after the ? are params so send them to splitparams

  QueryPos := Pos('?', URL);

  if QueryPos > 0 then

    SplitParams(Copy(URL, QueryPos+1, Length(URL) -QueryPos), Params);


{Given an URL get the document name

 there are some special cases to this:

  1) if URL is of form there is no document

     but when queried to the server it will return one, so return

     no document

  2) if URL is of form or or

     then document is index.php forget anything else}

function QueryDocument(const URL: String; var DocPos: Integer): String;


  QueryPos, i: Integer;


  Result := '';

  //check for parameters

  ProcessQuery(URL, nil, QueryPos);

  if (QueryPos > 0) then



    i := QueryPos;

    while URL[QueryPos] <> '/' do dec(i);


    Result := Copy(URL, i +1, QueryPos);




    {QueryPos is not found try to get either a docname or

     check if no document at hand}

    i := Length(URL);

    while URL[i] <> '/' do dec(i);


    DocPos := i;

    if (i >= Pos('://', URL)+3) then

      Result := Copy(URL, i, Length(URL))



      //No document or url of the form

      Result := '';




  //check for any bookmarks in the document

  if Pos('#', Result) > 0 then

    Result := Copy(Result, 1, Pos('#', Result) -1);

  if pos('/', Result) >0 then

    Delete(Result, pos('/', Result), 1);


{Query the port number of a url if any


 QueryPort will return 8000 else it will return 0}

function QueryPort(const URL: String): Integer;


  DotIdx, SlashIdx: Integer;

  Tmp, Buffer: String;


  Tmp := URL;

  if Pos('://', Tmp) > 0 then

    Delete(Tmp, Pos('://', Tmp), 3);

  SlashIdx := Pos('/', Tmp) -1;

  if SlashIdx < 0 then

    SlashIdx := Length(Tmp);

  DotIdx := Pos(':', Tmp);

  if DotIdx < 0 then

  // no Port Number so exit gracefully


  Buffer := Copy(Tmp, DotIdx+1, SlashIdx);

  val(Buffer, Result, DotIdx);


{Query a Bookmark in a document


 QueryBookMark will return 'notwelcome'}

function QueryBookmark(const URL: String): String;


  HashPos: Integer;

  QuestionPos: Integer;


  HashPos:= Pos('#', URL);

  if (HashPos <= 0) then Exit;

  QuestionPos:= Pos('?', URL);

  if (QuestionPos <= 0) then

    QuestionPos := Length(URL);

  Result := Copy(URL, HashPos+1, QuestionPos-1);


procedure ParseURI(const URI: String; var Host, Document, BookMark, Port: String; QueryParams: TStrings);


  QueryPos, DocPos: Integer;


  ProcessQuery(URI, QueryParams, QueryPos);

  BookMark := QueryBookmark(URI);

  Port := IntTOStr(QueryPort(URI));

  Document := QueryDocument(URI, DocPos);

  Host := Copy(URI, 1, DocPos -1);


By the way this algorithm way can be extended, expanded, etc., and I am working on it. Any suggestions, comments, critics, drop a comment.


  * According to some sources the Pos function is not as fast as it should be and can not be used for reverse string positioning

  * Yes I know this aint the best algorithm for URI parsing, as of this writing I am working out on enhancements, code enlargements etc.


