The Methapone Algorithm

A phonetic algorithm such as soundex only optimized for the english language

A description of the metaphone algorithm is available at this page also there is the double metaphone algorithm wich is also implemented on C and with a description at ASpell site



There is already a metaphone implementation in delphi you can find it at SourceForge that version is far better than this






The code is below, I obtained it by translating the Metaphone.cc unit of the htDig search engine, it works in C well but the translation I made aint the better, Why? because I translated mostly using the C approach and not the Delphi one.



I would also like to encourage research on a better (faster less code bloated) tanslation of this algorithm, I am working on one. If you happen to have a better translation post it.



NOTES: This algorithm as well as soundex are english only so no unicode support, or support for ñ, á, é í and miscelaneous characters



function MetaPhone3(const Word:String; KeyLength: Integer = 10): String;



  function Same(x: Char): Boolean;

  begin

    
Result := x in ['F','J','L','M','N','R'];

  end;



  function Vowel(x: Char): Boolean;

  begin

    
Result := x in ['A','E','I','O','U'];

  end;



  function Varson(x: Char): Boolean;

  begin

    
Result := x in ['C','G','P','S','T'];

  end;



  function Noghf(x: Char): Boolean;

  begin

    
Result := x in ['B', 'D', 'H'];

  end;



  function FrontV(x: Char): Boolean;

  begin

    
Result := x in ['E', 'I', 'Y']

  end;



var

  
i: Integer;

  Tmp:String;

begin

  
Tmp := Trim(UpperCase(Word));



  i := 1;

  while (i > 0) do

  begin

    if
(Tmp[i] in ['G', 'K', 'P']) and (Tmp[i+1] = 'N')

      or ((Tmp[i] = 'A') and (Tmp[i+1] = 'E'))

      or ((Tmp[i] = 'W') and (Tmp[i+1] = 'R')) then Delete(Tmp, i, 1);



    if (Tmp[i] = 'W') and (Tmp[i+1] = 'H') then

      
Delete(Tmp, 2, 1);



    if (Tmp[i] = 'X') then Tmp[i] := 'S';



    i := pos(' ', Tmp);

    if (i > 0) then Tmp[i] := #0;

  end;



  i := 0;

  Tmp := Tmp + #0;

  while (Length(Result) < KeyLength) do

  begin

    
inc(i);



    if (Tmp[i] =#0) then Break;



    if (Tmp[i] = Tmp[i-1]) and (Tmp[i] <> 'C') then

      
Continue;



    if Same(Tmp[i]) or (Vowel(Tmp[i]) and (Tmp[i-1] = #0)) then

    begin

      
Result := Result + Tmp[i];

      Continue;

    end;



    case Tmp[i] of

      
'B': if ((i>=2) and (Tmp[i-1] <> 'M')) or (i = 1) then Result := Result + Tmp[i];



      'C':

      begin

        if
FrontV(Tmp[i+1]) and (Tmp[i-1] <> 'S') then

        begin

          
Result := Result + 'S';

          inc(i);


        end else if (Copy(Tmp, i, 2) = 'CH') or (Copy (Tmp, i ,3) = 'CIA') then

        begin

          
Result := Result + 'X';

          if (Copy(Tmp, i, 2) = 'CH') then inc(i);

          if (Copy(Tmp, i, 3) = 'CIA')then inc(i, 2);

        end else Result := Result + 'K';

      end;



      'D': if (Copy(Tmp, i, 2) = 'DG') and FrontV(Tmp[i+3]) then

        begin

          
inc(i,3);

          Result := Result + 'J';

        end else

          
Result := Result + 'T';



      'G': if ((Tmp[i+1] <> 'G') or Vowel(Tmp[i+1])) and

        
((Tmp[i+1]<>'N') or ((Tmp[i+1] = #0) and (Tmp[i+2]<>'E')

         or (Tmp[i+3] <>'D')) and ((Tmp[i+1] <> 'D') or not FrontV(Tmp[i+1]))) then

        begin

         if
(FrontV(Tmp[i+1])) and (Tmp[i+2] <> 'G') then

           
Result := Result + 'J'

         
else

           
Result := Result + 'K';

        end else if (Tmp[i+1] = 'H') and not noghf(Tmp[i -3]) and (Tmp[i -4] <> 'H') then

          
Result := Result + 'F';





      'H': if not Varson(Tmp[i-1]) and (not Vowel(Tmp[i-1]) or Vowel(Tmp[i+1])) then

        
Result := Result + 'H';



      'K': if (Tmp[i-1] <> 'C') then Result := Result + 'K';



      'P': if (Tmp[i+1] = 'H') then

        
Result := Result + 'F'

        
else Result := Result + Tmp[i];



      'Q': Result := Result + 'K';



      'S': if (Tmp[i+1] = 'H') or ((Copy(Tmp, i, 2) = 'SI')

        and (Tmp[i+3] in ['O','A'])) then

        
Result := Result + 'X'

        
else

        
Result := Result + 'S';



      'T': if (Tmp[i+1] = 'I') and (Tmp[i+2] in ['O','A']) then

        
Result := Result + 'X'

        
else if (Tmp[i+1] = 'H') then Result := Result + '0' else

        if
(Tmp[i+1] <> 'C') or (Tmp[i+2] <> 'H') then Result := Result + 'T';



      'V': Result := Result + 'F';



      'W','Y': if Vowel(Tmp[i+1]) then Result := Result + Tmp[i];



      'X': Result := Result + 'KS';



      'Z': Result := Result + 'S';

    end;

  end;

end;



 

Share this article!

Follow us!

Find more helpful articles: