' **************************************************************************** ' PROGRAM: FUNCTION DBLMETAPHONE(STRING x$) ' PURPOSE: double metaphone calculator ' ARGUMENT: a string ' RETURNS: TRUE and updates a global record (DMValue) ' BaseWord$ = The Base String ' Primary$ = Primary metaphone value ' Secondary$ = Secondary value (always exists if Primary$ does) ' HasAlternate = TRUE if Primary$ AND Secondary$ are different ' AUTHOR: vovchik (Puppy Linux forum), based on Jason Filley's VB func. ' DEPENDS: bacon, bash ' PLATFORM: Puppy Linux (actually, any *nix) ' DATE: 25-03-2010 ' VERSION: 0.01a ' NOTES: Remove DECLARATIONS, INIT_VARS AND DM_TEST ' to use DBLMETAPHONE(x$) in your own programs ' **************************************************************************** ' Double Metaphone is the creation of Lawrence Philips. ' http://www.cuj.com/articles/2000/0006/0006d/0006d.htm?topic=articles ' and ' http://en.wikipedia.org/wiki/Double_Metaphone ' **************************************************************************** ' ***************** ' DECLARATIONS ' ***************** OPTION BASE 1 RECORD DMValue LOCAL BaseWord$ TYPE STRING LOCAL Primary$ TYPE STRING LOCAL Secondary$ TYPE STRING LOCAL HasAlternate TYPE NUMBER END RECORD CONST MaxWords = 9 GLOBAL Words$[MaxWords] ' ***************** ' END DECLARATIONS ' ***************** ' ***************** ' SUBS AND FUNCS ' ***************** ' -------- SUB INIT_VARS() ' -------- q$ = CHR$(34) MyVersion$ = "v. 0.1a" Words$[1] = "Erten" Words$[2] = "Arton" Words$[3] = "Airtone" Words$[4] = "Air Tone" Words$[5] = "Air Ten" Words$[6] = "Pascal" Words$[7] = "Ertain" Words$[8] = "Erken" Words$[9] = "Arden" END SUB ' -------- FUNCTION MyMID$(STRING InputString$, NUMBER Start, NUMBER Length) ' -------- LOCAL myresult$, vbNull$ vbNull$ = "" ' Mid throws fits IF the start position is less than 1 IF Start < 1 THEN myresult$ = vbNull$ ELSE myresult$ = MID$(InputString$, Start, Length) END IF RETURN myresult$ END FUNCTION ' -------- FUNCTION ContainsAnyOf(STRING BaseStringToSearch$, STRING SubstringsToSearchFor$) ' -------- ' LOCAL CurrentlySearching$ TYPE STRING LOCAL myresult, i SPLIT SubstringsToSearchFor$ BY "|" TO myarray$ SIZE maxelements FOR i = 1 TO maxelements IF INSTR(BaseStringToSearch$, myarray$[i]) THEN myresult = TRUE BREAK END IF NEXT i RETURN myresult END FUNCTION ' -------- FUNCTION IsSlavoGermanic(STRING InputString$) ' -------- LOCAL myresult ' Should only check this once per string ? IF ContainsAnyOf(InputString$, "W|K|CZ|WITZ") THEN myresult = TRUE END IF RETURN myresult END FUNCTION ' -------- FUNCTION DOUBLEMETAPHONE(STRING InputString$) ' -------- LOCAL iCurrentPosition TYPE NUMBER LOCAL CurrentChar$ TYPE STRING LOCAL Last TYPE NUMBER LOCAL tmpPrimary$ TYPE STRING LOCAL tmpSecondary$ TYPE STRING vbNullString$ = "" iCurrentPosition = 1 Last = LEN(InputString$) tmpPrimary$ = vbNullString$ tmpSecondary$ = vbNullString$ ' Might be faster arranging the CASEs as AND "TNSRHLDCPFMWBGVKQXJZ" ? ' ETAONISRHLDCUPFMWYBGVKQXJZ AND TNSRHLDCPFMWBGVKQXJZ WITH DMValue .BaseWord$ = InputString$ END WITH InputString$ = UCASE$(InputString$) InputString$ = CONCAT$(InputString$, " ") ' skip these when at start of word IF ContainsAnyOf(LEFT$(InputString$, 2), "GN|KN|PN|WR|PS") THEN iCurrentPosition = iCurrentPosition + 1 END IF ' Initial 'X' is pronounced 'Z' e.g. 'Xavier' IF EQUAL(LEFT$(InputString$, 1), "X") THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "S") tmpSecondary$ = CONCAT$(tmpSecondary$, "S") iCurrentPosition = iCurrentPosition + 1 END IF REPEAT ' GOTO's are ugly, but it's easy LABEL GetOutOfSelect CurrentChar$ = MID$(InputString$, iCurrentPosition, 1) SELECT CurrentChar$ CASE "A"; CASE "E"; CASE "I"; CASE "O"; CASE "U"; CASE "Y" IF iCurrentPosition EQ 1 THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "A") tmpSecondary$ = CONCAT$(tmpSecondary$, "A") END IF iCurrentPosition = iCurrentPosition + 1 CASE "B" ' "-mb", e.g", "dumb", already skipped over... tmpPrimary$ = CONCAT$(tmpPrimary$, "P") tmpSecondary$ = CONCAT$(tmpSecondary$, "P") IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "B") THEN iCurrentPosition = iCurrentPosition + 2 ELSE iCurrentPosition = iCurrentPosition + 1 END IF CASE "Ç" tmpPrimary$ = CONCAT$(tmpPrimary$, "S") tmpSecondary$ = CONCAT$(tmpSecondary$, "S") iCurrentPosition = iCurrentPosition + 1 CASE "C" ' various Germanic IF iCurrentPosition > 2 AND (INSTR("AEIOUY", MyMID$(InputString$, iCurrentPosition - 2, 1)) < 1) AND EQUAL(MyMID$(InputString$, iCurrentPosition - 1, 3), "ACH") AND ((NOT(EQUAL(MID$(InputString$, iCurrentPosition + 2, 1), "I"))) AND (NOT(EQUAL(MID$(InputString$, iCurrentPosition + 2, 1), "E")) OR (EQUAL(MyMID$(InputString$, iCurrentPosition - 2, 6), "BACHER") OR EQUAL(MyMID$(InputString$, iCurrentPosition - 2, 6), "MACHER")))) THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "K") tmpSecondary$ = CONCAT$(tmpSecondary$, "K") iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF ' special CASE 'caesar' IF iCurrentPosition EQ 1 AND EQUAL(LEFT$(InputString$, 6), "CAESAR") THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "S") tmpSecondary$ = CONCAT$(tmpSecondary$, "S") iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF ' Italian "chianti" IF EQUAL(MID$(InputString$, iCurrentPosition, 4), "CHIA") THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "K") tmpSecondary$ = CONCAT$(tmpSecondary$, "K") iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF IF EQUAL(MID$(InputString$, iCurrentPosition, 2), "CH") THEN ' find 'michael' IF iCurrentPosition > 1 AND EQUAL(MID$(InputString$, iCurrentPosition, 4), "CHAE") THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "K") tmpSecondary$ = CONCAT$(tmpSecondary$, "X") iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF ' Greek roots, e.g. "chemistry" AND "chorus" IF (iCurrentPosition EQ 1 AND ((EQUAL(MID$(InputString$, iCurrentPosition + 1, 5), "HARAC") OR EQUAL(MID$(InputString$, iCurrentPosition + 1, 5), "HARIS")) OR ContainsAnyOf(MID$(InputString$, iCurrentPosition + 1, 3), "HOR|HYM|HIA|HEM") AND NOT(EQUAL(MID$(InputString$, 1, 5), "CHORE")))) THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "K") tmpSecondary$ = CONCAT$(tmpSecondary$, "K") iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF ' Germanic, Greek, OR otherwise "ch" FOR "kh" sound IF (EQUAL(LEFT$(InputString$, 4), "VAN ") OR EQUAL(LEFT$(InputString$, 4), "VON ") OR EQUAL(LEFT$(InputString$, 3), "SCH")) OR ContainsAnyOf(MyMID$(InputString$, iCurrentPosition - 2, 6), "ORCHES|ARCHIT|ORCHID") OR INSTR("TS", MyMID$(InputString$, iCurrentPosition + 2, 1)) OR (((INSTR("AOUE", MyMID$(InputString$, iCurrentPosition - 1, 1)) > 0) OR iCurrentPosition EQ 1) AND INSTR("LRNMBHFVW ", MID$(InputString$, iCurrentPosition + 2, 1))) THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "K") tmpSecondary$ = CONCAT$(tmpSecondary$, "K") ELSE IF iCurrentPosition > 1 THEN IF EQUAL(LEFT$(InputString$, 2), "MC") THEN ' e.g., "McHugh" tmpPrimary$ = CONCAT$(tmpPrimary$, "K") tmpSecondary$ = CONCAT$(tmpSecondary$, "K") ELSE tmpPrimary$ = CONCAT$(tmpPrimary$, "X") tmpSecondary$ = CONCAT$(tmpSecondary$, "K") END IF ELSE tmpPrimary$ = CONCAT$(tmpPrimary$, "X") tmpSecondary$ = CONCAT$(tmpSecondary$, "X") END IF END IF iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF ' e.g, 'czerny' IF EQUAL(MID$(InputString$, iCurrentPosition, 2), "CZ") AND NOT(EQUAL(MyMID$(InputString$, iCurrentPosition - 2, 2), "CZ")) THEN tmpPrimary$ = CONCAT$(tmpPrimary$ ,"S") tmpSecondary$ = CONCAT$(tmpSecondary$, "X") iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF ' e.g., 'focaccia' IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 3), "CIA") THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "X") tmpSecondary$ = CONCAT$(tmpSecondary$, "X") iCurrentPosition = iCurrentPosition + 3 GOTO GetOutOfSelect END IF ' double 'C', but NOT IF e.g. 'McClellan' IF (EQUAL(MID$(InputString$, iCurrentPosition, 2), "CC")) AND NOT(iCurrentPosition EQ 1 AND EQUAL(LEFT$(InputString$, 1), "M")) THEN ' 'bellocchio' but NOT 'bacchus' IF INSTR("IEH", MID$(InputString$, iCurrentPosition + 2, 1)) AND NOT(EQUAL(MID$(InputString$, iCurrentPosition + 2, 2), "HU")) THEN ' 'accident', 'accede' 'succeed' IF iCurrentPosition EQ 2 AND EQUAL(LEFT$(InputString$, 1), "A") OR (EQUAL(MyMID$(InputString$, iCurrentPosition - 1, 5), "UCCEE") OR EQUAL(MyMID$(InputString$, iCurrentPosition - 1, 5), "UCCES")) THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "KS") tmpSecondary$ = CONCAT$(tmpSecondary$, "KS") ' 'bacci', 'bertucci', other italian ELSE tmpPrimary$ = CONCAT$(tmpPrimary$, "X") tmpSecondary$ = CONCAT$(tmpSecondary$, "X") END IF iCurrentPosition = iCurrentPosition + 3 GOTO GetOutOfSelect ' Pierce's rule ELSE tmpPrimary$ = CONCAT$(tmpPrimary$, "K") tmpSecondary$ = CONCAT$(tmpSecondary$, "K") iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF END IF IF ContainsAnyOf(MID$(InputString$, iCurrentPosition, 2), "CK|CG|CQ") THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "K") tmpSecondary$ = CONCAT$(tmpSecondary$, "K") iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF IF ContainsAnyOf(MID$(InputString$, iCurrentPosition, 2), "CI|CE|CY") THEN ' italian vs. english IF ContainsAnyOf(MID$(InputString$, iCurrentPosition, 3), "CIO|CIE|CIA") THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "S") tmpSecondary$ = CONCAT$(tmpSecondary$, "X") ELSE tmpPrimary$ = CONCAT$(tmpPrimary$, "S") tmpSecondary$ = CONCAT$(tmpSecondary$, "S") END IF iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF tmpPrimary$ = CONCAT$(tmpPrimary$, "K") tmpSecondary$ = CONCAT$(tmpSecondary$, "K") ' name sent in 'mac caffrey', 'mac gregor IF ContainsAnyOf(MID$(InputString$, iCurrentPosition + 1, 2), "| C| Q| G") THEN iCurrentPosition = iCurrentPosition + 3 ELSE IF INSTR("CKQ", MID$(InputString$, iCurrentPosition + 1, 1)) AND (EQUAL(MID$(InputString$, iCurrentPosition + 1, 2), "CE") OR EQUAL(MID$(InputString$, iCurrentPosition + 1, 2), "CI")) THEN iCurrentPosition = iCurrentPosition + 2 ELSE iCurrentPosition = iCurrentPosition + 1 END IF END IF CASE "D" IF EQUAL(MID$(InputString$, iCurrentPosition, 2), "DG") THEN IF INSTR("EIY", MID$(InputString$, iCurrentPosition + 2, 1)) THEN ' e.g. 'edge' tmpPrimary$ = CONCAT$(tmpPrimary$, "J") tmpSecondary$ = CONCAT$(tmpSecondary$, "J") iCurrentPosition = iCurrentPosition + 3 GOTO GetOutOfSelect ELSE ' e.g. 'edgar' tmpPrimary$ = CONCAT$(tmpPrimary$, "TK") tmpSecondary$ = CONCAT$(tmpSecondary$, "TK") iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF END IF IF EQUAL(MID$(InputString$, iCurrentPosition, 2), "DT") OR EQUAL(MID$(InputString$, iCurrentPosition, 2), "DD") THEN ' MetaphAdd ("T") tmpPrimary$ = CONCAT$(tmpPrimary$, "T") tmpSecondary$ = CONCAT$(tmpSecondary$, "T") iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF tmpPrimary$ = CONCAT$(tmpPrimary$, "T") tmpSecondary$ = CONCAT$(tmpSecondary$, "T") iCurrentPosition = iCurrentPosition + 1 CASE "F" IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "F") THEN iCurrentPosition = iCurrentPosition + 2 ELSE iCurrentPosition = iCurrentPosition + 1 END IF tmpPrimary$ = CONCAT$(tmpPrimary$, "F") tmpSecondary$ = CONCAT$(tmpSecondary$, "F") CASE "G" IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "H") THEN IF iCurrentPosition > 1 AND INSTR("AEIOUY", MyMID$(InputString$, iCurrentPosition - 1, 1)) < 1 THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "K") tmpSecondary$ = CONCAT$(tmpSecondary$, "K") iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF IF iCurrentPosition < 4 THEN ' 'ghislane', ghiradelli IF iCurrentPosition EQ 1 THEN IF EQUAL(MID$(InputString$, iCurrentPosition + 2, 1), "I") THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "J") tmpSecondary$ = CONCAT$(tmpSecondary$, "J") ELSE tmpPrimary$ = CONCAT$(tmpPrimary$, "K") tmpSecondary$ = CONCAT$(tmpSecondary$, "K") iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF END IF END IF ' Parker's rule (with some further refinements) - e.g., 'hugh' IF (iCurrentPosition > 2 AND ContainsAnyOf(MyMID$(InputString$, (iCurrentPosition - 2), 1), "B|H|D") OR (iCurrentPosition > 3 AND ContainsAnyOf(MyMID$(InputString$, (iCurrentPosition - 3), 1), "B|H|D")) OR (iCurrentPosition > 4 AND ContainsAnyOf(MyMID$(InputString$, (iCurrentPosition - 4), 1), "B|H"))) THEN iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect ELSE ' e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough' IF (iCurrentPosition > 3 AND EQUAL(MyMID$(InputString$, iCurrentPosition - 1, 1), "U") AND ContainsAnyOf(MyMID$(InputString$, (iCurrentPosition - 3), 1), "C|G|L|R|T")) THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "F") tmpSecondary$ = CONCAT$(tmpSecondary$, "F") ELSE IF (iCurrentPosition > 1 AND NOT(EQUAL(MyMID$(InputString$, iCurrentPosition - 1, 1), "I"))) THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "K") tmpSecondary$ = CONCAT$(tmpSecondary$, "K") END IF END IF iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF 'END IF END IF IF EQUAL(MyMID$(InputString$, iCurrentPosition + 1, 1), "N") THEN IF iCurrentPosition EQ 2 AND INSTR("AEIOUY", LEFT$(InputString$, 1)) AND (IsSlavoGermanic(InputString$) EQ FALSE) THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "KN") tmpSecondary$ = CONCAT$(tmpSecondary$, "N") ELSE ' NOT e.g. 'cagney' IF (NOT(EQUAL(MyMID$(InputString$, (iCurrentPosition + 2), 2), "EY"))) AND NOT(EQUAL(MyMID$(InputString$, iCurrentPosition + 1, 1), "Y")) AND NOT(IsSlavoGermanic(InputString$)) THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "N") tmpSecondary$ = CONCAT$(tmpSecondary$, "KN") ELSE tmpPrimary$ = CONCAT$(tmpPrimary$, "KN") tmpSecondary$ = CONCAT$(tmpSecondary$, "KN") END IF END IF iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF ' 'tagliaro' IF EQUAL(MyMID$(InputString$, (iCurrentPosition + 1), 2), "LI") AND IsSlavoGermanic(InputString$) EQ FALSE THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "KL") tmpSecondary$ = CONCAT$(tmpSecondary$, "L") iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF ' -ges-,-gep-,-gel-, -gie- at beginning IF (iCurrentPosition EQ 1 AND (EQUAL(MyMID$(InputString$, iCurrentPosition + 1, 1), "Y") OR ContainsAnyOf(MID$(InputString$, (iCurrentPosition + 1), 2), "ES|EP|EB|EL|EY|IB|IL|IN|IE|EI|ER"))) THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "K") tmpSecondary$ = CONCAT$(tmpSecondary$, "J") iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF ' -ger-, -gy- IF ((EQUAL(MyMID$(InputString$, (iCurrentPosition + 1), 2), "ER")) OR (EQUAL(MyMID$(InputString$, iCurrentPosition + 1, 1), "Y"))) AND (NOT(ContainsAnyOf(MyMID$(InputString$, 1, 6), "DANGER|RANGER|MANGER"))) AND (INSTR("EI", MyMID$(InputString$, iCurrentPosition - 1, 1)) < 1) AND (NOT(ContainsAnyOf(MyMID$(InputString$, (iCurrentPosition - 1), 3), "RGY|OGY"))) THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "K") tmpSecondary$ = CONCAT$(tmpSecondary$, "J") iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF ' Italian e.g, 'biaggi' IF INSTR("EIY", (MyMID$(InputString$, (iCurrentPosition + 1), 1))) OR ContainsAnyOf(MyMID$(InputString$, (iCurrentPosition - 1), 4), "AGGI|OGGI") THEN ' obvious Germanic IF ((ContainsAnyOf(MyMID$(InputString$, 1, 4), "VAN |VON ") OR EQUAL(MyMID$(InputString$, 1, 3), "SCH")) OR EQUAL(MyMID$(InputString$, (iCurrentPosition + 1), 2), "ET")) THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "K") tmpSecondary$ = CONCAT$(tmpSecondary$, "K") END IF ' always soft IF French ending IF EQUAL(MyMID$(InputString$, (iCurrentPosition + 1), 4), "IER ") THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "J") tmpSecondary$ = CONCAT$(tmpSecondary$, "J") ELSE tmpPrimary$ = CONCAT$(tmpPrimary$, "J") tmpSecondary$ = CONCAT$(tmpSecondary$, "K") END IF iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF IF EQUAL(MyMID$(InputString$, iCurrentPosition + 1, 1), "G") THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "K") tmpSecondary$ = CONCAT$(tmpSecondary$, "K") iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect ELSE tmpPrimary$ = CONCAT$(tmpPrimary$, "K") tmpSecondary$ = CONCAT$(tmpSecondary$, "K") iCurrentPosition = iCurrentPosition + 1 GOTO GetOutOfSelect END IF CASE "H" ' only keep IF first & before vowel OR btw. 2 vowels IF iCurrentPosition EQ 1 OR INSTR("AEIOUY", MyMID$(InputString$, iCurrentPosition - 1, 1)) THEN IF INSTR("AEIOUY", MID$(InputString$, iCurrentPosition + 1, 1)) THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "H") tmpSecondary$ = CONCAT$(tmpSecondary$, "H") iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF ' also takes care of 'HH' END IF iCurrentPosition = iCurrentPosition + 1 CASE "J" ' obvious Spanish, 'jose', 'san jacinto' IF EQUAL(MID$(InputString$, iCurrentPosition, 4), "JOSE") OR EQUAL(LEFT$(InputString$, 4), "SAN ") THEN IF ((iCurrentPosition EQ 1) AND ((EQUAL(MID$(InputString$, iCurrentPosition + 4, 1), " ")) OR (EQUAL(LEFT$(InputString$, 4), "SAN ")))) THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "H") tmpSecondary$ = CONCAT$(tmpSecondary$, "H") ELSE tmpPrimary$ = CONCAT$(tmpPrimary$, "J") tmpSecondary$ = CONCAT$(tmpSecondary$, "H") END IF iCurrentPosition = iCurrentPosition + 1 GOTO GetOutOfSelect END IF IF (iCurrentPosition EQ 1) AND (NOT(EQUAL(MID$(InputString$, iCurrentPosition, 4), "JOSE"))) THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "J") tmpSecondary$ = CONCAT$(tmpSecondary$, "A") ELSE ' Spanish pron. of e.g. 'bajador' IF INSTR("AEIOUY", MyMID$(InputString$, iCurrentPosition - 1, 1)) > 0 AND (IsSlavoGermanic(InputString$) EQ FALSE) AND INSTR("AO", MID$(InputString$, iCurrentPosition + 1, 1)) THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "J") tmpSecondary$ = CONCAT$(tmpSecondary$, "H") ELSE IF iCurrentPosition EQ Last THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "J") ELSE IF (INSTR("LTKSNMBZ", MID$(InputString$, iCurrentPosition + 1, 1)) < 1) AND (INSTR("SKL", MyMID$(InputString$, iCurrentPosition - 1, 1)) < 1) THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "J") tmpSecondary$ = CONCAT$(tmpSecondary$, "J") END IF END IF END IF END IF IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "J") THEN iCurrentPosition = iCurrentPosition + 2 ELSE iCurrentPosition = iCurrentPosition + 1 END IF CASE "K" IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "K") THEN iCurrentPosition = iCurrentPosition + 2 ELSE iCurrentPosition = iCurrentPosition + 1 END IF tmpPrimary$ = CONCAT$(tmpPrimary$, "K") tmpSecondary$ = CONCAT$(tmpSecondary$, "K") CASE "L" IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "L") THEN ' Spanish e.g. 'cabrillo', 'gallegos' IF (((iCurrentPosition EQ Last - 2) AND ContainsAnyOf(MyMID$(InputString$, iCurrentPosition - 1, 4), "ILLO|ILLA|ALLE")) OR ((ContainsAnyOf(MyMID$(InputString$, Last - 1, 2), "AS|OS") OR INSTR("AO", MID$(InputString$, Last, 1))) AND EQUAL(MyMID$(InputString$, iCurrentPosition - 1, 4), "ALLE"))) THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "L") iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF iCurrentPosition = iCurrentPosition + 2 ELSE iCurrentPosition = iCurrentPosition + 1 END IF tmpPrimary$ = CONCAT$(tmpPrimary$, "L") tmpSecondary$ = CONCAT$(tmpSecondary$, "L") CASE "M" IF (EQUAL(MyMID$(InputString$, iCurrentPosition - 1, 3), "UMB") AND (iCurrentPosition + 1 EQ Last OR EQUAL(MID$(InputString$, iCurrentPosition + 2, 2), "ER"))) OR EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "M") THEN iCurrentPosition = iCurrentPosition + 2 ELSE iCurrentPosition = iCurrentPosition + 1 END IF tmpPrimary$ = CONCAT$(tmpPrimary$, "M") tmpSecondary$ = CONCAT$(tmpSecondary$, "M") CASE "N" IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "N") THEN iCurrentPosition = iCurrentPosition + 2 ELSE iCurrentPosition = iCurrentPosition + 1 END IF tmpPrimary$ = CONCAT$(tmpPrimary$, "N") tmpSecondary$ = CONCAT$(tmpSecondary$, "N") CASE "Ñ" tmpPrimary$ = CONCAT$(tmpPrimary$, "N") tmpSecondary$ = CONCAT$(tmpSecondary$, "N") iCurrentPosition = iCurrentPosition + 1 GOTO GetOutOfSelect CASE "P" IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "H") THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "F") tmpSecondary$ = CONCAT$(tmpSecondary$, "F") iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF ' also account for campbell and raspberry IF INSTR("PB", MID$(InputString$, iCurrentPosition + 1, 1)) THEN iCurrentPosition = iCurrentPosition + 2 ELSE iCurrentPosition = iCurrentPosition + 1 END IF tmpPrimary$ = CONCAT$(tmpPrimary$, "P") tmpSecondary$ = CONCAT$(tmpSecondary$, "P") CASE "Q" IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "Q") THEN iCurrentPosition = iCurrentPosition + 2 ELSE iCurrentPosition = iCurrentPosition + 1 END IF tmpPrimary$ = CONCAT$(tmpPrimary$, "K") tmpSecondary$ = CONCAT$(tmpSecondary$, "K") CASE "R" ' French e.g. 'rogier', but exclude 'hochmeier' IF iCurrentPosition EQ Last AND IsSlavoGermanic(InputString$) EQ FALSE AND EQUAL(MyMID$(InputString$, iCurrentPosition - 2, 2), "IE") AND NOT(EQUAL(MyMID$(InputString$, iCurrentPosition - 4, 2), "ME")) AND NOT(EQUAL(MyMID$(InputString$, iCurrentPosition - 4, 2), "MA")) THEN tmpSecondary$ = CONCAT$(tmpSecondary$, "R") ELSE tmpPrimary$ = CONCAT$(tmpPrimary$, "R") tmpSecondary$ = CONCAT$(tmpSecondary$, "R") END IF IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "R") THEN iCurrentPosition = iCurrentPosition + 2 ELSE iCurrentPosition = iCurrentPosition + 1 END IF CASE "S" ' special cases 'island', 'isle', 'carlisle', 'carlysle' IF EQUAL(MyMID$(InputString$, iCurrentPosition - 1, 3), "ISL") OR EQUAL(MyMID$(InputString$, iCurrentPosition - 1, 3), "YSL") THEN iCurrentPosition = iCurrentPosition + 1 GOTO GetOutOfSelect END IF ' special case 'sugar-' IF iCurrentPosition EQ 1 AND EQUAL(LEFT$(InputString$, 5), "SUGAR") THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "X") tmpSecondary$ = CONCAT$(tmpSecondary$, "S") iCurrentPosition = iCurrentPosition + 1 GOTO GetOutOfSelect END IF IF EQUAL(MID$(InputString$, iCurrentPosition, 2), "SH") THEN ' Germanic IF ContainsAnyOf(MID$(InputString$, iCurrentPosition + 1, 4), "HEIM|HOEK|HOLM|HOLZ") THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "S") tmpSecondary$ = CONCAT$(tmpSecondary$, "S") ELSE ' MetaphAdd ("X") tmpPrimary$ = CONCAT$(tmpPrimary$, "X") tmpSecondary$ = CONCAT$(tmpSecondary$, "X") END IF iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF ' Italian and Armenian IF EQUAL(MID$(InputString$, iCurrentPosition, 3), "SIO") OR EQUAL(MID$(InputString$, iCurrentPosition, 3), "SIA") OR EQUAL(MID$(InputString$, iCurrentPosition, 4), "SIAN") THEN IF IsSlavoGermanic(InputString$) EQ FALSE THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "S") tmpSecondary$ = CONCAT$(tmpSecondary$, "X") ELSE tmpPrimary$ = CONCAT$(tmpPrimary$, "S") tmpSecondary$ = CONCAT$(tmpSecondary$, "S") END IF iCurrentPosition = iCurrentPosition + 3 GOTO GetOutOfSelect END IF ' German & Anglicizations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider' ' also, -sz- in Slavic languages, although in Hungarian it is pronounced 's' IF (iCurrentPosition EQ 1) AND INSTR("MNLW", MID$(InputString$, iCurrentPosition + 1, 1)) OR EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "Z") THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "S") tmpSecondary$ = CONCAT$(tmpSecondary$, "X") IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "Z") THEN iCurrentPosition = iCurrentPosition + 2 ELSE iCurrentPosition = iCurrentPosition + 1 END IF GOTO GetOutOfSelect END IF IF EQUAL(MID$(InputString$, iCurrentPosition, 2), "SC") THEN ' Schlesinger's rule IF EQUAL(MID$(InputString$, iCurrentPosition + 2, 1), "H") THEN ' Dutch origin, e.g. 'school', 'schooner' IF ContainsAnyOf(MID$(InputString$, iCurrentPosition + 3, 2), "OO|ER|EN|UY|ED|EM") THEN ' 'schermerhorn', 'schenker' IF EQUAL(MID$(InputString$, iCurrentPosition + 3, 2), "ER") OR EQUAL(MID$(InputString$, iCurrentPosition + 3, 2), "EN") THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "X") tmpSecondary$ = CONCAT$(tmpSecondary$, "SK") ELSE tmpPrimary$ = CONCAT$(tmpPrimary$, "SK") tmpSecondary$ = CONCAT$(tmpSecondary$, "SK") END IF iCurrentPosition = iCurrentPosition + 3 GOTO GetOutOfSelect ELSE IF iCurrentPosition EQ 1 AND (INSTR("AEIOUYW", MID$(InputString$, 4, 1)) < 1) THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "X") tmpSecondary$ = CONCAT$(tmpSecondary$, "S") ELSE tmpPrimary$ = CONCAT$(tmpPrimary$, "X") tmpSecondary$ = CONCAT$(tmpSecondary$, "X") END IF iCurrentPosition = iCurrentPosition + 3 GOTO GetOutOfSelect END IF ELIF INSTR("IEY", MID$(InputString$, iCurrentPosition + 2, 1)) > 0 THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "S") tmpSecondary$ = CONCAT$(tmpSecondary$, "S") iCurrentPosition = iCurrentPosition + 3 GOTO GetOutOfSelect END IF ' otherwise tmpPrimary$ = CONCAT$(tmpPrimary$, "SK") tmpSecondary$ = CONCAT$(tmpSecondary$, "SK") iCurrentPosition = iCurrentPosition + 3 GOTO GetOutOfSelect END IF ' French e.g. 'resnais', 'artois' IF iCurrentPosition EQ Last AND (EQUAL(MyMID$(InputString$, iCurrentPosition - 2, 2), "AI") OR EQUAL(MyMID$(InputString$, iCurrentPosition - 2, 2), "OI")) THEN tmpSecondary$ = CONCAT$(tmpSecondary$, "S") ELSE tmpPrimary$ = CONCAT$(tmpPrimary$, "S") tmpSecondary$ = CONCAT$(tmpSecondary$, "S") END IF IF INSTR("SZ", MID$(InputString$, iCurrentPosition + 1, 1)) THEN iCurrentPosition = iCurrentPosition + 2 ELSE iCurrentPosition = iCurrentPosition + 1 END IF ' END IF CASE "T" IF EQUAL(MID$(InputString$, iCurrentPosition, 4), "TION") THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "X") tmpSecondary$ = CONCAT$(tmpSecondary$, "X") iCurrentPosition = iCurrentPosition + 3 GOTO GetOutOfSelect END IF IF EQUAL(MID$(InputString$, iCurrentPosition, 3), "TIA") OR EQUAL(MID$(InputString$, iCurrentPosition, 3), "TCH") THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "X") tmpSecondary$ = CONCAT$(tmpSecondary$, "X") iCurrentPosition = iCurrentPosition + 3 GOTO GetOutOfSelect END IF IF EQUAL(MID$(InputString$, iCurrentPosition, 2), "TH") OR EQUAL(MID$(InputString$, iCurrentPosition, 3), "TTH") THEN ' special case 'thomas', 'thames' or Germanic IF EQUAL(MID$(InputString$, iCurrentPosition + 2, 2), "OM") OR EQUAL(MID$(InputString$, iCurrentPosition + 2, 2), "AM") OR EQUAL(LEFT$(InputString$, 4), "VAN ") OR EQUAL(LEFT$(InputString$, 4), "VON ") OR EQUAL(LEFT$(InputString$, 3), "SCH") THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "T") tmpSecondary$ = CONCAT$(tmpSecondary$, "T") ELSE tmpPrimary$ = CONCAT$(tmpPrimary$, "0") tmpSecondary$ = CONCAT$(tmpSecondary$, "T") END IF iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF IF INSTR("TD", MID$(InputString$, iCurrentPosition + 1, 1)) THEN iCurrentPosition = iCurrentPosition + 2 ELSE iCurrentPosition = iCurrentPosition + 1 END IF tmpPrimary$ = CONCAT$(tmpPrimary$, "T") tmpSecondary$ = CONCAT$(tmpSecondary$, "T") CASE "V" IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "V") THEN iCurrentPosition = iCurrentPosition + 2 ELSE iCurrentPosition = iCurrentPosition + 1 END IF tmpPrimary$ = CONCAT$(tmpPrimary$, "F") tmpSecondary$ = CONCAT$(tmpSecondary$, "F") GOTO GetOutOfSelect CASE "W" ' can also be in middle of word IF EQUAL(MID$(InputString$, iCurrentPosition, 2), "WR") THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "R") tmpSecondary$ = CONCAT$(tmpSecondary$, "R") iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF IF iCurrentPosition EQ 1 AND (INSTR("AEIOUY", MID$(InputString$, iCurrentPosition + 1, 1)) OR EQUAL(MID$(InputString$, iCurrentPosition, 2), "WH")) THEN ' Wasserman should match Vasserman IF INSTR("AEIOUY", MID$(InputString$, iCurrentPosition + 1, 1)) THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "A") tmpSecondary$ = CONCAT$(tmpSecondary$, "F") ELSE ' need Uomo to match Womo tmpPrimary$ = CONCAT$(tmpPrimary$, "A") tmpSecondary$ = CONCAT$(tmpSecondary$, "A") END IF END IF ' Arnow should match Arnoff IF (iCurrentPosition EQ Last AND INSTR("AEIOUY", MyMID$(InputString$, iCurrentPosition - 1, 1)) OR ContainsAnyOf(MyMID$(InputString$, iCurrentPosition - 1, 5), "EWSKI|EWSKY|OWSKI|OWSKY") OR EQUAL(LEFT$(InputString$, 3), "SCH")) THEN tmpSecondary$ = CONCAT$(tmpSecondary$, "F") iCurrentPosition = iCurrentPosition + 1 GOTO GetOutOfSelect END IF ' Polish e.g. 'filipowicz' IF EQUAL(MID$(InputString$, iCurrentPosition, 4), "WICZ") OR EQUAL(MID$(InputString$, iCurrentPosition, 4), "WITZ") THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "TS") tmpSecondary$ = CONCAT$(tmpSecondary$, "FX") iCurrentPosition = iCurrentPosition + 4 GOTO GetOutOfSelect END IF ' else skip it iCurrentPosition = iCurrentPosition + 1 CASE "X" ' French e.g. breaux IF NOT (iCurrentPosition EQ Last AND (EQUAL(MyMID$(InputString$, iCurrentPosition - 3, 3), "IAU") OR EQUAL(MyMID$(InputString$, iCurrentPosition - 3, 3), "IAU") OR EQUAL(MyMID$(InputString$, iCurrentPosition - 2, 2), "AU") OR EQUAL(MyMID$(InputString$, iCurrentPosition - 2, 2), "OU"))) THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "KS") tmpSecondary$ = CONCAT$(tmpSecondary$, "KS") END IF IF INSTR("CX", MID$(InputString$, iCurrentPosition + 1, 1)) THEN iCurrentPosition = iCurrentPosition + 1 END IF iCurrentPosition = iCurrentPosition + 1 CASE "Z" ' Chinese Pinyin e.g. 'zhao' IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "H") THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "J") tmpSecondary$ = CONCAT$(tmpSecondary$, "J") iCurrentPosition = iCurrentPosition + 2 GOTO GetOutOfSelect END IF IF (ContainsAnyOf(MID$(InputString$, iCurrentPosition + 1, 2), "ZO|ZI|ZA") OR (IsSlavoGermanic(InputString$) AND (iCurrentPosition > 1 AND NOT(EQUAL(MyMID$(InputString$, iCurrentPosition - 1, 1), "T"))))) THEN tmpPrimary$ = CONCAT$(tmpPrimary$, "S") tmpSecondary$ = CONCAT$(tmpSecondary$, "TS") ELSE tmpPrimary$ = CONCAT$(tmpPrimary$, "S") tmpSecondary$ = CONCAT$(tmpSecondary$, "S") END IF IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "Z") THEN iCurrentPosition = iCurrentPosition + 2 ELSE iCurrentPosition = iCurrentPosition + 1 END IF DEFAULT iCurrentPosition = iCurrentPosition + 1 END SELECT UNTIL iCurrentPosition > Last WITH DMValue .Primary$ = tmpPrimary$ .Secondary$ = tmpSecondary$ END WITH IF NOT(EQUAL(tmpPrimary$, tmpSecondary$)) THEN WITH DMValue .HasAlternate = TRUE END WITH END IF RETURN TRUE END FUNCTION ' -------- SUB DM_TEST(STRING Word2Find$) ' -------- LOCAL i PRINT DOUBLEMETAPHONE(Word2Find$) MyPrimary$ = DMValue.Primary$ MySecondary$ = DMValue.Secondary$ MyAlternate = DMValue.HasAlternate ' PRINT Word2Find$, " ", MyPrimary$, " ", MySecondary$, " Alt: ", MyAlternate FOR i = 1 TO MaxWords DOUBLEMETAPHONE(Words$[i]) ' PRINT DMValue.BaseWord$," ", DMValue.Primary$," ", DMValue.Secondary$," Alt: ", DMValue.HasAlternate PRINT i, ". ", q$, Word2Find$, q$; IF EQUAL(MyPrimary$, DMValue.Primary$) OR EQUAL(MyPrimary$, DMValue.Secondary$) OR EQUAL(MySecondary$, DMValue.Primary$) OR EQUAL(MySecondary$, DMValue.Secondary$) THEN PRINT " sounds like "; ELSE PRINT " does NOT sound like "; END IF PRINT q$, Words$[i], q$ NEXT i PRINT END SUB ' ***************** ' END SUBS & FUNCS ' ***************** ' ***************** ' MAIN ' ***************** INIT_VARS DM_TEST("Eerten") END ' ***************** ' END MAIN ' *****************