Hyphenation

A simple hyphenation algorithm (syllabicates Spanish words)

Hyphenation



Sometimes we need to display or print a text, and we'd like to hyphenate long words that don't fit at the end of a line, to prevent them from falling entirely into the next line leaving too much space unused.



The main problem that arises is how to divide a word in syllables. Well, I really don't know how to syllabicate in English, so I leave that part to you, but I hope you find the example on Spanish syllabication useful:



procedure Syllabify(Syllables: TStringList; s: string);

  const

    Consonants = ['b','B','c','C','d','D','f','F','g','G',

            'h','H','j','J','k','K','l','L','m','M','n','N',

            'ñ','Ñ','p','P','q','Q','r','R','s','S','t','T',

            'v','V','w','W','x','X','y','Y','z','Z'];

    StrongVowels = ['a','A','á','Á','e','E','é','É',

                    'í','Í','o','ó','O','Ó','ú','Ú'];

    WeakVowels = ['i','I','u','U','ü','Ü'];

    Vowels = StrongVowels + WeakVowels;

    Letters = Vowels + Consonants;

  var

    i, j, n, m, hyphen: integer;

  begin

    j := 2;

    s := #0 + s + #0;

    n := Length(s) - 1;

    i := 2;

    Syllables.Clear;

    while i <= n do begin

      hyphen := 0; // Do not hyphenate

      if s[i] in Consonants then begin

        if s[i+1] in Vowels then begin

          if s[i-1] in Vowels then hyphen := 1;

        end else if (s[i+1] in Consonants) and

                    (s[i-1] in Vowels) then begin

          if s[i+1] in ['r','R'] then begin

            if s[i] in ['b','B','c','C','d','D','f','F','g',

                'G','k','K','p','P','r','R','t','T','v','V']

            then hyphen := 1 else hyphen := 2;

          end else if s[i+1] in ['l','L'] then begin

            if s[i] in ['b','B','c','C','d','D','f','F','g',

                'G','k','K','l','L','p','P','t','T','v','V']

            then hyphen := 1 else hyphen := 2;

          end else if s[i+1] in ['h', 'H'] then begin

            if s[i] in ['c', 'C', 's', 'S', 'p', 'P']

            then hyphen := 1 else hyphen := 2;

          end else

            hyphen := 2;

        end;

      end else if s[i] in StrongVowels then begin

        if (s[i-1] in StrongVowels) then hyphen := 1

      end else if s[i] = '-' then begin

        Syllables.Add(Copy(s, j, i - j));

        Syllables.Add('-');

        inc(i);

        j := i;

      end;

      if hyphen = 1 then begin // Hyphenate here

        Syllables.Add(Copy(s, j, i - j));

        j := i;

      end else if hyphen = 2 then begin // Hyphenate after

        inc(i);

        Syllables.Add(Copy(s, j, i - j));

        j := i;

      end;

      inc(i);

    end;

    m := Syllables.Count - 1;

    if (j = n) and (m >= 0) and (s[n] in Consonants) then

      Syllables[m] := Syllables[m] + s[n] // Last letter

    else

      Syllables.Add(Copy(s, j, n - j + 1)); // Last syllable

  end;


To test the procedure yon can drop a Textbox and a Label on a form and in the Change event of the Textbox write:



procedure TForm1.Edit1Change(Sender: TObject);

  var

    Syllables: TStringList;

  begin

    Syllables := TStringList.Create;

    try

      Syllabify(Syllables, Edit1.Text);

      Label1.Caption := StringReplace(Trim(Syllables.Text),

        #13#10, '-', [rfReplaceAll]);

    finally

      Syllables.Free;

    end;

  end;


Now that we have a syllabication procedure, we have to note that we can't hyphenate a word in any syllable break. It is usually correct

and/or desirable to join small syllables at the left and/or right sides of a word to guarantee for example that there are at least two syllables on either side of the word when it gets hyphenated, or -like in the following example- to make sure that at least we have four characters in either side:



procedure ApplyRules(Syllables: TStringList);

  // Guarantee there are at least four letters in the left

  // and right parts of the word

  begin

    with Syllables do begin

      if Count = 1 then exit;

      while Count > 1 do begin

        if Length(Strings[0]) >= 4 then break;

        Strings[0] := Strings[0] + Strings[1];

        Delete(1);

      end;

      while Syllables.Count > 1 do begin

        if Length(Strings[Count-1]) >= 4 then break;

        Strings[Count-2] := Strings[Count-2]

                          + Strings[Count-1];

        Delete(Count-1);

      end;

    end;

  end;


Finally, it comes the time to parse the text separating the lines of a paragraph determining which words should be hyphenated. The following example does that with a text to be displayed in a Memo:



procedure Hyphenate(Memo: TMemo; OriginalText: TStrings);

  var

    paragraph, i, j, k, m, n, MaxLineWidth: integer;


    s, line: string;

    Bitmap: TBitmap;

    Canvas: TCanvas;

    Syllables: TStringList;

  begin

    Syllables := TStringList.Create;

    try

    // We need a canvas to use its TextWidth method to get the width

    // of the text to see if it fits in the client area or not. The

    // TMemo class doesn't have a Canvas property, so we have to

    // create one of our own.

      Bitmap := TBitmap.Create;

      Canvas := Bitmap.Canvas;

      try

        Canvas.Font := Memo.Font;

        MaxLineWidth := Memo.ClientWidth - 6; // Maximum width

        Memo.Lines.Clear;

        for paragraph := 0 to OriginalText.Count - 1 do begin

          // For each paragraph

          s := OriginalText[paragraph]; // Get the original paragraph

          // Get the lines in which we have to break the paragraph

          while Canvas.TextWidth(s) > MaxLineWidth do begin

            // First we find (in "j") the index of the start of the

            // first word that doesn't fit (the one to hyphenate)

            j := 1;

            n := Length(s);

            i := 2;

            while i <= n do begin

              if (s[i-1] = ' ') and (s[i] <> ' ') then

                j := i; // last beginning of a word

              if Canvas.TextWidth(Copy(s, 1, i)) > MaxLineWidth then

                break; // reached a width that doesn't fit

              inc(i);

            end;

            // Where does the break occurs?

            if s[i] = ' ' then begin

              // Great! We break on a space

              Memo.Lines.Add(Copy(s, 1, i - 1)); // Add the line

              s := Copy(s, i + 1, n - i); // Remove the line

            end else begin

              // We break somewhere in a word. Now, we find (in "k")

              // the first space after the word (k)

              k := j + 1;

              while (k <= n) and (s[k] <> ' ') do inc(k);

              // Divide the word in Syllables

              Syllabify(Syllables, Copy(s, j, k - j));

              ApplyRules(Syllables);

              // Check (in "m") how many syllables fit

              m := 0;

              Line := Copy(s, 1, j-1);

              while Canvas.TextWidth(Line + Syllables[m] + '-')

                    <= MaxLineWidth do begin

                Line := Line + Syllables[m];

                inc(m);

              end;

              if (m <> 0) and (Syllables[m-1] <> '-') then begin

                // Hyphenate

                Line := Line + '-';

                j := Length(Line);

                if Syllables[m] = '-' then inc(j);

              end;

              Memo.Lines.Add(Line); // Add the line

              s := Copy(s, j, n - j + 1); // Remove the line

            end;

          end;

          Memo.Lines.Add(s); // Add the last line (it fits)

        end;

      finally

        Bitmap.Free;

      end;

    finally

      Syllables.Free;

    end;

  end;


To test the procedure, drop a Memo component on a form, align it for example to the top of the form (Align = alTop) and write the following code in the Resize event of the form:



procedure TForm1.FormResize(Sender: TObject);

  var

    OriginalText: TStringList;

  begin

    OriginalText := TStringList.Create;

    try

      OriginalText.Add('Si se ha preguntado cómo hacen los '

        + 'programas procesamiento de textos para dividir palabras '

        + 'con de guiones al final de una línea, he aquí un '

        + 'ejemplo sencillo (en comparación con los que usan las '

        + 'aplicaciones de procesamiento de textos).');

      OriginalText.Add('Este es un segundo párrafo que se provee '

        + 'con fines de ejemplo.');

      Hyphenate(Memo1, OriginalText);

    finally

      OriginalText.Free;

    end;

  end;


You can find the full source code of this article in the archive that accompanies the Pascal Newsletter #23

 

Share this article!

Follow us!

Find more helpful articles: