The following code demonstrates how to parse a html file looking for

Begin Tag

End Tag

Raw Text

The following routine demonstrates how to parse a html file.

I welcome feed back to improve the routine, if you have any suggestions/hints please let me know.


Si Carter

---------------- BEGIN CODE BLOCK ------------------------

unit HTMLParse;



    Purpose: Parse a html file to extract tags and plain text.

    Copyright © 2003 - TECT Software Ltd. All Rights Reserved.

    All code remains the property of TECT Software Ltd and may not

    be changed without permission. Use of this code is granted to

    any developer for private, open source or commercial applications.

    No warranty expressed or implied. Use at own risk.


      WEB -

      EMail - [email protected]

    Copyright Notice Must Remain With File.

    Visit for *low cost* developer friendly web hosting.


      FastStrings from


      See for demo usage.



uses Classes, FastStringFuncs, FastStrings;


  TTagType = (ttBeginTag, ttEndTag, ttRawText);

  THTMLParseProc = procedure(const HTMLData: string; TagType: TTagType;

    Parameters: TStrings);

procedure ParseHTML(const HTML: string; ParseProc: THTMLParseProc);


uses SysUtils;


  (* NOTE: download the file below, the following codes are wrong when

 displayed in a browser like this :-) *)

  THTMLReplaceWords: array[0..4] of array[0..1] of string = ((' ', ' '),

    ('&', '&'), ('<', '<'), ('>', '>'), ('"', '"'));

procedure ParseHTML(const HTML: string; ParseProc: THTMLParseProc);

  procedure CallTagProc(IsTag: Boolean; HTMLData: string);


    s: string;

    sl: TStringList;

    I: Integer;


    HTMLData := Trim(HTMLData);

    if Length(HTMLData) > 0 then


      if IsTag then


        if Pos(' ', HTMLData) > 0 then

          s := Trim(Copy(HTMLData, 1, Pos(' ', HTMLData)))


          s := Trim(HTMLData);

        sl := TStringList.Create;


          sl.Text := Trim(Copy(HTMLData, Length(s) + 1, length(HTMLData)));

          sl.Text := Trim(FastReplace(sl.Text, ';', #13));

          sl.Text := Trim(FastReplace(sl.Text, '" ', #13));

          sl.Text := Trim(FastReplace(sl.Text, '"', ''));

          if LeftStr(s, 1) = '/' then

            THTMLParseProc(ParseProc)(uppercase(s), ttEndTag, sl)


            THTMLParseProc(ParseProc)(UpperCase(s), ttBeginTag, sl);




      end else


        for I := 0 to 4 do

          HTMLData := FastReplace(HTMLData, THTMLReplaceWords[I, 0],

            THTMLReplaceWords[I, 1]);

        THTMLParseProc(ParseProc)(HTMLData, ttRawText, nil);





  s: string;

  P: PChar;



  P := PChar(HTML);

  s := '';

  while P^ <> #0 do


    case P^ of



          CallTagProc(False, s);

          s := '';




          CallTagProc(True, s);

          s := '';



      s := s + P^;

    end; //case





---------------- END CODE BLOCK ------------------------


Source File: HTMLParse.pas


All tested using D6.


Share this article!

Follow us!

Find more helpful articles: