' ****************************************************************************
' PROGRAM: FUNCTION DBLMETAPHONE(STRING x$)
' PURPOSE: double metaphone calculator
' ARGUMENT: a string
' RETURNS: TRUE and updates a global record (DMValue)
' BaseWord$ = The Base String
' Primary$ = Primary metaphone value
' Secondary$ = Secondary value (always exists if Primary$ does)
' HasAlternate = TRUE if Primary$ AND Secondary$ are different
' AUTHOR: vovchik (Puppy Linux forum), based on Jason Filley's VB func.
' DEPENDS: bacon, bash
' PLATFORM: Puppy Linux (actually, any *nix)
' DATE: 25-03-2010
' VERSION: 0.01a
' NOTES: Remove DECLARATIONS, INIT_VARS AND DM_TEST
' to use DBLMETAPHONE(x$) in your own programs
' ****************************************************************************
' Double Metaphone is the creation of Lawrence Philips.
' http://www.cuj.com/articles/2000/0006/0006d/0006d.htm?topic=articles
' and
' http://en.wikipedia.org/wiki/Double_Metaphone
' ****************************************************************************
' *****************
' DECLARATIONS
' *****************
OPTION BASE 1
RECORD DMValue
LOCAL BaseWord$ TYPE STRING
LOCAL Primary$ TYPE STRING
LOCAL Secondary$ TYPE STRING
LOCAL HasAlternate TYPE NUMBER
END RECORD
CONST MaxWords = 9
GLOBAL Words$[MaxWords]
' *****************
' END DECLARATIONS
' *****************
' *****************
' SUBS AND FUNCS
' *****************
' --------
SUB INIT_VARS()
' --------
q$ = CHR$(34)
MyVersion$ = "v. 0.1a"
Words$[1] = "Erten"
Words$[2] = "Arton"
Words$[3] = "Airtone"
Words$[4] = "Air Tone"
Words$[5] = "Air Ten"
Words$[6] = "Pascal"
Words$[7] = "Ertain"
Words$[8] = "Erken"
Words$[9] = "Arden"
END SUB
' --------
FUNCTION MyMID$(STRING InputString$, NUMBER Start, NUMBER Length)
' --------
LOCAL myresult$, vbNull$
vbNull$ = ""
' Mid throws fits IF the start position is less than 1
IF Start < 1 THEN
myresult$ = vbNull$
ELSE
myresult$ = MID$(InputString$, Start, Length)
END IF
RETURN myresult$
END FUNCTION
' --------
FUNCTION ContainsAnyOf(STRING BaseStringToSearch$, STRING SubstringsToSearchFor$)
' --------
' LOCAL CurrentlySearching$ TYPE STRING
LOCAL myresult, i
SPLIT SubstringsToSearchFor$ BY "|" TO myarray$ SIZE maxelements
FOR i = 1 TO maxelements
IF INSTR(BaseStringToSearch$, myarray$[i]) THEN
myresult = TRUE
BREAK
END IF
NEXT i
RETURN myresult
END FUNCTION
' --------
FUNCTION IsSlavoGermanic(STRING InputString$)
' --------
LOCAL myresult
' Should only check this once per string ?
IF ContainsAnyOf(InputString$, "W|K|CZ|WITZ") THEN
myresult = TRUE
END IF
RETURN myresult
END FUNCTION
' --------
FUNCTION DOUBLEMETAPHONE(STRING InputString$)
' --------
LOCAL iCurrentPosition TYPE NUMBER
LOCAL CurrentChar$ TYPE STRING
LOCAL Last TYPE NUMBER
LOCAL tmpPrimary$ TYPE STRING
LOCAL tmpSecondary$ TYPE STRING
vbNullString$ = ""
iCurrentPosition = 1
Last = LEN(InputString$)
tmpPrimary$ = vbNullString$
tmpSecondary$ = vbNullString$
' Might be faster arranging the CASEs as <vowels> AND "TNSRHLDCPFMWBGVKQXJZ" ?
' ETAONISRHLDCUPFMWYBGVKQXJZ AND TNSRHLDCPFMWBGVKQXJZ
WITH DMValue
.BaseWord$ = InputString$
END WITH
InputString$ = UCASE$(InputString$)
InputString$ = CONCAT$(InputString$, " ")
' skip these when at start of word
IF ContainsAnyOf(LEFT$(InputString$, 2), "GN|KN|PN|WR|PS") THEN
iCurrentPosition = iCurrentPosition + 1
END IF
' Initial 'X' is pronounced 'Z' e.g. 'Xavier'
IF EQUAL(LEFT$(InputString$, 1), "X") THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "S")
tmpSecondary$ = CONCAT$(tmpSecondary$, "S")
iCurrentPosition = iCurrentPosition + 1
END IF
REPEAT
' GOTO's are ugly, but it's easy
LABEL GetOutOfSelect
CurrentChar$ = MID$(InputString$, iCurrentPosition, 1)
SELECT CurrentChar$
CASE "A";
CASE "E";
CASE "I";
CASE "O";
CASE "U";
CASE "Y"
IF iCurrentPosition EQ 1 THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "A")
tmpSecondary$ = CONCAT$(tmpSecondary$, "A")
END IF
iCurrentPosition = iCurrentPosition + 1
CASE "B"
' "-mb", e.g", "dumb", already skipped over...
tmpPrimary$ = CONCAT$(tmpPrimary$, "P")
tmpSecondary$ = CONCAT$(tmpSecondary$, "P")
IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "B") THEN
iCurrentPosition = iCurrentPosition + 2
ELSE
iCurrentPosition = iCurrentPosition + 1
END IF
CASE "Ç"
tmpPrimary$ = CONCAT$(tmpPrimary$, "S")
tmpSecondary$ = CONCAT$(tmpSecondary$, "S")
iCurrentPosition = iCurrentPosition + 1
CASE "C"
' various Germanic
IF iCurrentPosition > 2 AND (INSTR("AEIOUY", MyMID$(InputString$, iCurrentPosition - 2, 1)) < 1) AND EQUAL(MyMID$(InputString$, iCurrentPosition - 1, 3), "ACH") AND ((NOT(EQUAL(MID$(InputString$, iCurrentPosition + 2, 1), "I"))) AND (NOT(EQUAL(MID$(InputString$, iCurrentPosition + 2, 1), "E")) OR (EQUAL(MyMID$(InputString$, iCurrentPosition - 2, 6), "BACHER") OR EQUAL(MyMID$(InputString$, iCurrentPosition - 2, 6), "MACHER")))) THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "K")
tmpSecondary$ = CONCAT$(tmpSecondary$, "K")
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
' special CASE 'caesar'
IF iCurrentPosition EQ 1 AND EQUAL(LEFT$(InputString$, 6), "CAESAR") THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "S")
tmpSecondary$ = CONCAT$(tmpSecondary$, "S")
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
' Italian "chianti"
IF EQUAL(MID$(InputString$, iCurrentPosition, 4), "CHIA") THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "K")
tmpSecondary$ = CONCAT$(tmpSecondary$, "K")
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
IF EQUAL(MID$(InputString$, iCurrentPosition, 2), "CH") THEN
' find 'michael'
IF iCurrentPosition > 1 AND EQUAL(MID$(InputString$, iCurrentPosition, 4), "CHAE") THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "K")
tmpSecondary$ = CONCAT$(tmpSecondary$, "X")
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
' Greek roots, e.g. "chemistry" AND "chorus"
IF (iCurrentPosition EQ 1 AND ((EQUAL(MID$(InputString$, iCurrentPosition + 1, 5), "HARAC") OR EQUAL(MID$(InputString$, iCurrentPosition + 1, 5), "HARIS")) OR ContainsAnyOf(MID$(InputString$, iCurrentPosition + 1, 3), "HOR|HYM|HIA|HEM") AND NOT(EQUAL(MID$(InputString$, 1, 5), "CHORE")))) THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "K")
tmpSecondary$ = CONCAT$(tmpSecondary$, "K")
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
' Germanic, Greek, OR otherwise "ch" FOR "kh" sound
IF (EQUAL(LEFT$(InputString$, 4), "VAN ") OR EQUAL(LEFT$(InputString$, 4), "VON ") OR EQUAL(LEFT$(InputString$, 3), "SCH")) OR ContainsAnyOf(MyMID$(InputString$, iCurrentPosition - 2, 6), "ORCHES|ARCHIT|ORCHID") OR INSTR("TS", MyMID$(InputString$, iCurrentPosition + 2, 1)) OR (((INSTR("AOUE", MyMID$(InputString$, iCurrentPosition - 1, 1)) > 0) OR iCurrentPosition EQ 1) AND INSTR("LRNMBHFVW ", MID$(InputString$, iCurrentPosition + 2, 1))) THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "K")
tmpSecondary$ = CONCAT$(tmpSecondary$, "K")
ELSE
IF iCurrentPosition > 1 THEN
IF EQUAL(LEFT$(InputString$, 2), "MC") THEN
' e.g., "McHugh"
tmpPrimary$ = CONCAT$(tmpPrimary$, "K")
tmpSecondary$ = CONCAT$(tmpSecondary$, "K")
ELSE
tmpPrimary$ = CONCAT$(tmpPrimary$, "X")
tmpSecondary$ = CONCAT$(tmpSecondary$, "K")
END IF
ELSE
tmpPrimary$ = CONCAT$(tmpPrimary$, "X")
tmpSecondary$ = CONCAT$(tmpSecondary$, "X")
END IF
END IF
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
' e.g, 'czerny'
IF EQUAL(MID$(InputString$, iCurrentPosition, 2), "CZ") AND NOT(EQUAL(MyMID$(InputString$, iCurrentPosition - 2, 2), "CZ")) THEN
tmpPrimary$ = CONCAT$(tmpPrimary$ ,"S")
tmpSecondary$ = CONCAT$(tmpSecondary$, "X")
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
' e.g., 'focaccia'
IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 3), "CIA") THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "X")
tmpSecondary$ = CONCAT$(tmpSecondary$, "X")
iCurrentPosition = iCurrentPosition + 3
GOTO GetOutOfSelect
END IF
' double 'C', but NOT IF e.g. 'McClellan'
IF (EQUAL(MID$(InputString$, iCurrentPosition, 2), "CC")) AND NOT(iCurrentPosition EQ 1 AND EQUAL(LEFT$(InputString$, 1), "M")) THEN
' 'bellocchio' but NOT 'bacchus'
IF INSTR("IEH", MID$(InputString$, iCurrentPosition + 2, 1)) AND NOT(EQUAL(MID$(InputString$, iCurrentPosition + 2, 2), "HU")) THEN
' 'accident', 'accede' 'succeed'
IF iCurrentPosition EQ 2 AND EQUAL(LEFT$(InputString$, 1), "A") OR (EQUAL(MyMID$(InputString$, iCurrentPosition - 1, 5), "UCCEE") OR EQUAL(MyMID$(InputString$, iCurrentPosition - 1, 5), "UCCES")) THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "KS")
tmpSecondary$ = CONCAT$(tmpSecondary$, "KS")
' 'bacci', 'bertucci', other italian
ELSE
tmpPrimary$ = CONCAT$(tmpPrimary$, "X")
tmpSecondary$ = CONCAT$(tmpSecondary$, "X")
END IF
iCurrentPosition = iCurrentPosition + 3
GOTO GetOutOfSelect
' Pierce's rule
ELSE
tmpPrimary$ = CONCAT$(tmpPrimary$, "K")
tmpSecondary$ = CONCAT$(tmpSecondary$, "K")
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
END IF
IF ContainsAnyOf(MID$(InputString$, iCurrentPosition, 2), "CK|CG|CQ") THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "K")
tmpSecondary$ = CONCAT$(tmpSecondary$, "K")
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
IF ContainsAnyOf(MID$(InputString$, iCurrentPosition, 2), "CI|CE|CY") THEN
' italian vs. english
IF ContainsAnyOf(MID$(InputString$, iCurrentPosition, 3), "CIO|CIE|CIA") THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "S")
tmpSecondary$ = CONCAT$(tmpSecondary$, "X")
ELSE
tmpPrimary$ = CONCAT$(tmpPrimary$, "S")
tmpSecondary$ = CONCAT$(tmpSecondary$, "S")
END IF
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
tmpPrimary$ = CONCAT$(tmpPrimary$, "K")
tmpSecondary$ = CONCAT$(tmpSecondary$, "K")
' name sent in 'mac caffrey', 'mac gregor
IF ContainsAnyOf(MID$(InputString$, iCurrentPosition + 1, 2), "| C| Q| G") THEN
iCurrentPosition = iCurrentPosition + 3
ELSE
IF INSTR("CKQ", MID$(InputString$, iCurrentPosition + 1, 1)) AND (EQUAL(MID$(InputString$, iCurrentPosition + 1, 2), "CE") OR EQUAL(MID$(InputString$, iCurrentPosition + 1, 2), "CI")) THEN
iCurrentPosition = iCurrentPosition + 2
ELSE
iCurrentPosition = iCurrentPosition + 1
END IF
END IF
CASE "D"
IF EQUAL(MID$(InputString$, iCurrentPosition, 2), "DG") THEN
IF INSTR("EIY", MID$(InputString$, iCurrentPosition + 2, 1)) THEN
' e.g. 'edge'
tmpPrimary$ = CONCAT$(tmpPrimary$, "J")
tmpSecondary$ = CONCAT$(tmpSecondary$, "J")
iCurrentPosition = iCurrentPosition + 3
GOTO GetOutOfSelect
ELSE
' e.g. 'edgar'
tmpPrimary$ = CONCAT$(tmpPrimary$, "TK")
tmpSecondary$ = CONCAT$(tmpSecondary$, "TK")
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
END IF
IF EQUAL(MID$(InputString$, iCurrentPosition, 2), "DT") OR EQUAL(MID$(InputString$, iCurrentPosition, 2), "DD") THEN
' MetaphAdd ("T")
tmpPrimary$ = CONCAT$(tmpPrimary$, "T")
tmpSecondary$ = CONCAT$(tmpSecondary$, "T")
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
tmpPrimary$ = CONCAT$(tmpPrimary$, "T")
tmpSecondary$ = CONCAT$(tmpSecondary$, "T")
iCurrentPosition = iCurrentPosition + 1
CASE "F"
IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "F") THEN
iCurrentPosition = iCurrentPosition + 2
ELSE
iCurrentPosition = iCurrentPosition + 1
END IF
tmpPrimary$ = CONCAT$(tmpPrimary$, "F")
tmpSecondary$ = CONCAT$(tmpSecondary$, "F")
CASE "G"
IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "H") THEN
IF iCurrentPosition > 1 AND INSTR("AEIOUY", MyMID$(InputString$, iCurrentPosition - 1, 1)) < 1 THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "K")
tmpSecondary$ = CONCAT$(tmpSecondary$, "K")
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
IF iCurrentPosition < 4 THEN
' 'ghislane', ghiradelli
IF iCurrentPosition EQ 1 THEN
IF EQUAL(MID$(InputString$, iCurrentPosition + 2, 1), "I") THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "J")
tmpSecondary$ = CONCAT$(tmpSecondary$, "J")
ELSE
tmpPrimary$ = CONCAT$(tmpPrimary$, "K")
tmpSecondary$ = CONCAT$(tmpSecondary$, "K")
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
END IF
END IF
' Parker's rule (with some further refinements) - e.g., 'hugh'
IF (iCurrentPosition > 2 AND ContainsAnyOf(MyMID$(InputString$, (iCurrentPosition - 2), 1), "B|H|D") OR (iCurrentPosition > 3 AND ContainsAnyOf(MyMID$(InputString$, (iCurrentPosition - 3), 1), "B|H|D")) OR (iCurrentPosition > 4 AND ContainsAnyOf(MyMID$(InputString$, (iCurrentPosition - 4), 1), "B|H"))) THEN
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
ELSE
' e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
IF (iCurrentPosition > 3 AND EQUAL(MyMID$(InputString$, iCurrentPosition - 1, 1), "U") AND ContainsAnyOf(MyMID$(InputString$, (iCurrentPosition - 3), 1), "C|G|L|R|T")) THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "F")
tmpSecondary$ = CONCAT$(tmpSecondary$, "F")
ELSE
IF (iCurrentPosition > 1 AND NOT(EQUAL(MyMID$(InputString$, iCurrentPosition - 1, 1), "I"))) THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "K")
tmpSecondary$ = CONCAT$(tmpSecondary$, "K")
END IF
END IF
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
'END IF
END IF
IF EQUAL(MyMID$(InputString$, iCurrentPosition + 1, 1), "N") THEN
IF iCurrentPosition EQ 2 AND INSTR("AEIOUY", LEFT$(InputString$, 1)) AND (IsSlavoGermanic(InputString$) EQ FALSE) THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "KN")
tmpSecondary$ = CONCAT$(tmpSecondary$, "N")
ELSE
' NOT e.g. 'cagney'
IF (NOT(EQUAL(MyMID$(InputString$, (iCurrentPosition + 2), 2), "EY"))) AND NOT(EQUAL(MyMID$(InputString$, iCurrentPosition + 1, 1), "Y")) AND NOT(IsSlavoGermanic(InputString$)) THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "N")
tmpSecondary$ = CONCAT$(tmpSecondary$, "KN")
ELSE
tmpPrimary$ = CONCAT$(tmpPrimary$, "KN")
tmpSecondary$ = CONCAT$(tmpSecondary$, "KN")
END IF
END IF
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
' 'tagliaro'
IF EQUAL(MyMID$(InputString$, (iCurrentPosition + 1), 2), "LI") AND IsSlavoGermanic(InputString$) EQ FALSE THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "KL")
tmpSecondary$ = CONCAT$(tmpSecondary$, "L")
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
' -ges-,-gep-,-gel-, -gie- at beginning
IF (iCurrentPosition EQ 1 AND (EQUAL(MyMID$(InputString$, iCurrentPosition + 1, 1), "Y") OR ContainsAnyOf(MID$(InputString$, (iCurrentPosition + 1), 2), "ES|EP|EB|EL|EY|IB|IL|IN|IE|EI|ER"))) THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "K")
tmpSecondary$ = CONCAT$(tmpSecondary$, "J")
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
' -ger-, -gy-
IF ((EQUAL(MyMID$(InputString$, (iCurrentPosition + 1), 2), "ER")) OR (EQUAL(MyMID$(InputString$, iCurrentPosition + 1, 1), "Y"))) AND (NOT(ContainsAnyOf(MyMID$(InputString$, 1, 6), "DANGER|RANGER|MANGER"))) AND (INSTR("EI", MyMID$(InputString$, iCurrentPosition - 1, 1)) < 1) AND (NOT(ContainsAnyOf(MyMID$(InputString$, (iCurrentPosition - 1), 3), "RGY|OGY"))) THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "K")
tmpSecondary$ = CONCAT$(tmpSecondary$, "J")
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
' Italian e.g, 'biaggi'
IF INSTR("EIY", (MyMID$(InputString$, (iCurrentPosition + 1), 1))) OR ContainsAnyOf(MyMID$(InputString$, (iCurrentPosition - 1), 4), "AGGI|OGGI") THEN
' obvious Germanic
IF ((ContainsAnyOf(MyMID$(InputString$, 1, 4), "VAN |VON ") OR EQUAL(MyMID$(InputString$, 1, 3), "SCH")) OR EQUAL(MyMID$(InputString$, (iCurrentPosition + 1), 2), "ET")) THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "K")
tmpSecondary$ = CONCAT$(tmpSecondary$, "K")
END IF
' always soft IF French ending
IF EQUAL(MyMID$(InputString$, (iCurrentPosition + 1), 4), "IER ") THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "J")
tmpSecondary$ = CONCAT$(tmpSecondary$, "J")
ELSE
tmpPrimary$ = CONCAT$(tmpPrimary$, "J")
tmpSecondary$ = CONCAT$(tmpSecondary$, "K")
END IF
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
IF EQUAL(MyMID$(InputString$, iCurrentPosition + 1, 1), "G") THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "K")
tmpSecondary$ = CONCAT$(tmpSecondary$, "K")
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
ELSE
tmpPrimary$ = CONCAT$(tmpPrimary$, "K")
tmpSecondary$ = CONCAT$(tmpSecondary$, "K")
iCurrentPosition = iCurrentPosition + 1
GOTO GetOutOfSelect
END IF
CASE "H"
' only keep IF first & before vowel OR btw. 2 vowels
IF iCurrentPosition EQ 1 OR INSTR("AEIOUY", MyMID$(InputString$, iCurrentPosition - 1, 1)) THEN
IF INSTR("AEIOUY", MID$(InputString$, iCurrentPosition + 1, 1)) THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "H")
tmpSecondary$ = CONCAT$(tmpSecondary$, "H")
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
' also takes care of 'HH'
END IF
iCurrentPosition = iCurrentPosition + 1
CASE "J"
' obvious Spanish, 'jose', 'san jacinto'
IF EQUAL(MID$(InputString$, iCurrentPosition, 4), "JOSE") OR EQUAL(LEFT$(InputString$, 4), "SAN ") THEN
IF ((iCurrentPosition EQ 1) AND ((EQUAL(MID$(InputString$, iCurrentPosition + 4, 1), " ")) OR (EQUAL(LEFT$(InputString$, 4), "SAN ")))) THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "H")
tmpSecondary$ = CONCAT$(tmpSecondary$, "H")
ELSE
tmpPrimary$ = CONCAT$(tmpPrimary$, "J")
tmpSecondary$ = CONCAT$(tmpSecondary$, "H")
END IF
iCurrentPosition = iCurrentPosition + 1
GOTO GetOutOfSelect
END IF
IF (iCurrentPosition EQ 1) AND (NOT(EQUAL(MID$(InputString$, iCurrentPosition, 4), "JOSE"))) THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "J")
tmpSecondary$ = CONCAT$(tmpSecondary$, "A")
ELSE
' Spanish pron. of e.g. 'bajador'
IF INSTR("AEIOUY", MyMID$(InputString$, iCurrentPosition - 1, 1)) > 0 AND (IsSlavoGermanic(InputString$) EQ FALSE) AND INSTR("AO", MID$(InputString$, iCurrentPosition + 1, 1)) THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "J")
tmpSecondary$ = CONCAT$(tmpSecondary$, "H")
ELSE
IF iCurrentPosition EQ Last THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "J")
ELSE
IF (INSTR("LTKSNMBZ", MID$(InputString$, iCurrentPosition + 1, 1)) < 1) AND (INSTR("SKL", MyMID$(InputString$, iCurrentPosition - 1, 1)) < 1) THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "J")
tmpSecondary$ = CONCAT$(tmpSecondary$, "J")
END IF
END IF
END IF
END IF
IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "J") THEN
iCurrentPosition = iCurrentPosition + 2
ELSE
iCurrentPosition = iCurrentPosition + 1
END IF
CASE "K"
IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "K") THEN
iCurrentPosition = iCurrentPosition + 2
ELSE
iCurrentPosition = iCurrentPosition + 1
END IF
tmpPrimary$ = CONCAT$(tmpPrimary$, "K")
tmpSecondary$ = CONCAT$(tmpSecondary$, "K")
CASE "L"
IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "L") THEN
' Spanish e.g. 'cabrillo', 'gallegos'
IF (((iCurrentPosition EQ Last - 2) AND ContainsAnyOf(MyMID$(InputString$, iCurrentPosition - 1, 4), "ILLO|ILLA|ALLE")) OR ((ContainsAnyOf(MyMID$(InputString$, Last - 1, 2), "AS|OS") OR INSTR("AO", MID$(InputString$, Last, 1))) AND EQUAL(MyMID$(InputString$, iCurrentPosition - 1, 4), "ALLE"))) THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "L")
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
iCurrentPosition = iCurrentPosition + 2
ELSE
iCurrentPosition = iCurrentPosition + 1
END IF
tmpPrimary$ = CONCAT$(tmpPrimary$, "L")
tmpSecondary$ = CONCAT$(tmpSecondary$, "L")
CASE "M"
IF (EQUAL(MyMID$(InputString$, iCurrentPosition - 1, 3), "UMB") AND (iCurrentPosition + 1 EQ Last OR EQUAL(MID$(InputString$, iCurrentPosition + 2, 2), "ER"))) OR EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "M") THEN
iCurrentPosition = iCurrentPosition + 2
ELSE
iCurrentPosition = iCurrentPosition + 1
END IF
tmpPrimary$ = CONCAT$(tmpPrimary$, "M")
tmpSecondary$ = CONCAT$(tmpSecondary$, "M")
CASE "N"
IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "N") THEN
iCurrentPosition = iCurrentPosition + 2
ELSE
iCurrentPosition = iCurrentPosition + 1
END IF
tmpPrimary$ = CONCAT$(tmpPrimary$, "N")
tmpSecondary$ = CONCAT$(tmpSecondary$, "N")
CASE "Ñ"
tmpPrimary$ = CONCAT$(tmpPrimary$, "N")
tmpSecondary$ = CONCAT$(tmpSecondary$, "N")
iCurrentPosition = iCurrentPosition + 1
GOTO GetOutOfSelect
CASE "P"
IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "H") THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "F")
tmpSecondary$ = CONCAT$(tmpSecondary$, "F")
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
' also account for campbell and raspberry
IF INSTR("PB", MID$(InputString$, iCurrentPosition + 1, 1)) THEN
iCurrentPosition = iCurrentPosition + 2
ELSE
iCurrentPosition = iCurrentPosition + 1
END IF
tmpPrimary$ = CONCAT$(tmpPrimary$, "P")
tmpSecondary$ = CONCAT$(tmpSecondary$, "P")
CASE "Q"
IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "Q") THEN
iCurrentPosition = iCurrentPosition + 2
ELSE
iCurrentPosition = iCurrentPosition + 1
END IF
tmpPrimary$ = CONCAT$(tmpPrimary$, "K")
tmpSecondary$ = CONCAT$(tmpSecondary$, "K")
CASE "R"
' French e.g. 'rogier', but exclude 'hochmeier'
IF iCurrentPosition EQ Last AND IsSlavoGermanic(InputString$) EQ FALSE AND EQUAL(MyMID$(InputString$, iCurrentPosition - 2, 2), "IE") AND NOT(EQUAL(MyMID$(InputString$, iCurrentPosition - 4, 2), "ME")) AND NOT(EQUAL(MyMID$(InputString$, iCurrentPosition - 4, 2), "MA")) THEN
tmpSecondary$ = CONCAT$(tmpSecondary$, "R")
ELSE
tmpPrimary$ = CONCAT$(tmpPrimary$, "R")
tmpSecondary$ = CONCAT$(tmpSecondary$, "R")
END IF
IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "R") THEN
iCurrentPosition = iCurrentPosition + 2
ELSE
iCurrentPosition = iCurrentPosition + 1
END IF
CASE "S"
' special cases 'island', 'isle', 'carlisle', 'carlysle'
IF EQUAL(MyMID$(InputString$, iCurrentPosition - 1, 3), "ISL") OR EQUAL(MyMID$(InputString$, iCurrentPosition - 1, 3), "YSL") THEN
iCurrentPosition = iCurrentPosition + 1
GOTO GetOutOfSelect
END IF
' special case 'sugar-'
IF iCurrentPosition EQ 1 AND EQUAL(LEFT$(InputString$, 5), "SUGAR") THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "X")
tmpSecondary$ = CONCAT$(tmpSecondary$, "S")
iCurrentPosition = iCurrentPosition + 1
GOTO GetOutOfSelect
END IF
IF EQUAL(MID$(InputString$, iCurrentPosition, 2), "SH") THEN
' Germanic
IF ContainsAnyOf(MID$(InputString$, iCurrentPosition + 1, 4), "HEIM|HOEK|HOLM|HOLZ") THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "S")
tmpSecondary$ = CONCAT$(tmpSecondary$, "S")
ELSE
' MetaphAdd ("X")
tmpPrimary$ = CONCAT$(tmpPrimary$, "X")
tmpSecondary$ = CONCAT$(tmpSecondary$, "X")
END IF
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
' Italian and Armenian
IF EQUAL(MID$(InputString$, iCurrentPosition, 3), "SIO") OR EQUAL(MID$(InputString$, iCurrentPosition, 3), "SIA") OR EQUAL(MID$(InputString$, iCurrentPosition, 4), "SIAN") THEN
IF IsSlavoGermanic(InputString$) EQ FALSE THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "S")
tmpSecondary$ = CONCAT$(tmpSecondary$, "X")
ELSE
tmpPrimary$ = CONCAT$(tmpPrimary$, "S")
tmpSecondary$ = CONCAT$(tmpSecondary$, "S")
END IF
iCurrentPosition = iCurrentPosition + 3
GOTO GetOutOfSelect
END IF
' German & Anglicizations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
' also, -sz- in Slavic languages, although in Hungarian it is pronounced 's'
IF (iCurrentPosition EQ 1) AND INSTR("MNLW", MID$(InputString$, iCurrentPosition + 1, 1)) OR EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "Z") THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "S")
tmpSecondary$ = CONCAT$(tmpSecondary$, "X")
IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "Z") THEN
iCurrentPosition = iCurrentPosition + 2
ELSE
iCurrentPosition = iCurrentPosition + 1
END IF
GOTO GetOutOfSelect
END IF
IF EQUAL(MID$(InputString$, iCurrentPosition, 2), "SC") THEN
' Schlesinger's rule
IF EQUAL(MID$(InputString$, iCurrentPosition + 2, 1), "H") THEN
' Dutch origin, e.g. 'school', 'schooner'
IF ContainsAnyOf(MID$(InputString$, iCurrentPosition + 3, 2), "OO|ER|EN|UY|ED|EM") THEN
' 'schermerhorn', 'schenker'
IF EQUAL(MID$(InputString$, iCurrentPosition + 3, 2), "ER") OR EQUAL(MID$(InputString$, iCurrentPosition + 3, 2), "EN") THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "X")
tmpSecondary$ = CONCAT$(tmpSecondary$, "SK")
ELSE
tmpPrimary$ = CONCAT$(tmpPrimary$, "SK")
tmpSecondary$ = CONCAT$(tmpSecondary$, "SK")
END IF
iCurrentPosition = iCurrentPosition + 3
GOTO GetOutOfSelect
ELSE
IF iCurrentPosition EQ 1 AND (INSTR("AEIOUYW", MID$(InputString$, 4, 1)) < 1) THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "X")
tmpSecondary$ = CONCAT$(tmpSecondary$, "S")
ELSE
tmpPrimary$ = CONCAT$(tmpPrimary$, "X")
tmpSecondary$ = CONCAT$(tmpSecondary$, "X")
END IF
iCurrentPosition = iCurrentPosition + 3
GOTO GetOutOfSelect
END IF
ELIF INSTR("IEY", MID$(InputString$, iCurrentPosition + 2, 1)) > 0 THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "S")
tmpSecondary$ = CONCAT$(tmpSecondary$, "S")
iCurrentPosition = iCurrentPosition + 3
GOTO GetOutOfSelect
END IF
' otherwise
tmpPrimary$ = CONCAT$(tmpPrimary$, "SK")
tmpSecondary$ = CONCAT$(tmpSecondary$, "SK")
iCurrentPosition = iCurrentPosition + 3
GOTO GetOutOfSelect
END IF
' French e.g. 'resnais', 'artois'
IF iCurrentPosition EQ Last AND (EQUAL(MyMID$(InputString$, iCurrentPosition - 2, 2), "AI") OR EQUAL(MyMID$(InputString$, iCurrentPosition - 2, 2), "OI")) THEN
tmpSecondary$ = CONCAT$(tmpSecondary$, "S")
ELSE
tmpPrimary$ = CONCAT$(tmpPrimary$, "S")
tmpSecondary$ = CONCAT$(tmpSecondary$, "S")
END IF
IF INSTR("SZ", MID$(InputString$, iCurrentPosition + 1, 1)) THEN
iCurrentPosition = iCurrentPosition + 2
ELSE
iCurrentPosition = iCurrentPosition + 1
END IF
' END IF
CASE "T"
IF EQUAL(MID$(InputString$, iCurrentPosition, 4), "TION") THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "X")
tmpSecondary$ = CONCAT$(tmpSecondary$, "X")
iCurrentPosition = iCurrentPosition + 3
GOTO GetOutOfSelect
END IF
IF EQUAL(MID$(InputString$, iCurrentPosition, 3), "TIA") OR EQUAL(MID$(InputString$, iCurrentPosition, 3), "TCH") THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "X")
tmpSecondary$ = CONCAT$(tmpSecondary$, "X")
iCurrentPosition = iCurrentPosition + 3
GOTO GetOutOfSelect
END IF
IF EQUAL(MID$(InputString$, iCurrentPosition, 2), "TH") OR EQUAL(MID$(InputString$, iCurrentPosition, 3), "TTH") THEN
' special case 'thomas', 'thames' or Germanic
IF EQUAL(MID$(InputString$, iCurrentPosition + 2, 2), "OM") OR EQUAL(MID$(InputString$, iCurrentPosition + 2, 2), "AM") OR EQUAL(LEFT$(InputString$, 4), "VAN ") OR EQUAL(LEFT$(InputString$, 4), "VON ") OR EQUAL(LEFT$(InputString$, 3), "SCH") THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "T")
tmpSecondary$ = CONCAT$(tmpSecondary$, "T")
ELSE
tmpPrimary$ = CONCAT$(tmpPrimary$, "0")
tmpSecondary$ = CONCAT$(tmpSecondary$, "T")
END IF
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
IF INSTR("TD", MID$(InputString$, iCurrentPosition + 1, 1)) THEN
iCurrentPosition = iCurrentPosition + 2
ELSE
iCurrentPosition = iCurrentPosition + 1
END IF
tmpPrimary$ = CONCAT$(tmpPrimary$, "T")
tmpSecondary$ = CONCAT$(tmpSecondary$, "T")
CASE "V"
IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "V") THEN
iCurrentPosition = iCurrentPosition + 2
ELSE
iCurrentPosition = iCurrentPosition + 1
END IF
tmpPrimary$ = CONCAT$(tmpPrimary$, "F")
tmpSecondary$ = CONCAT$(tmpSecondary$, "F")
GOTO GetOutOfSelect
CASE "W"
' can also be in middle of word
IF EQUAL(MID$(InputString$, iCurrentPosition, 2), "WR") THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "R")
tmpSecondary$ = CONCAT$(tmpSecondary$, "R")
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
IF iCurrentPosition EQ 1 AND (INSTR("AEIOUY", MID$(InputString$, iCurrentPosition + 1, 1)) OR EQUAL(MID$(InputString$, iCurrentPosition, 2), "WH")) THEN
' Wasserman should match Vasserman
IF INSTR("AEIOUY", MID$(InputString$, iCurrentPosition + 1, 1)) THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "A")
tmpSecondary$ = CONCAT$(tmpSecondary$, "F")
ELSE
' need Uomo to match Womo
tmpPrimary$ = CONCAT$(tmpPrimary$, "A")
tmpSecondary$ = CONCAT$(tmpSecondary$, "A")
END IF
END IF
' Arnow should match Arnoff
IF (iCurrentPosition EQ Last AND INSTR("AEIOUY", MyMID$(InputString$, iCurrentPosition - 1, 1)) OR ContainsAnyOf(MyMID$(InputString$, iCurrentPosition - 1, 5), "EWSKI|EWSKY|OWSKI|OWSKY") OR EQUAL(LEFT$(InputString$, 3), "SCH")) THEN
tmpSecondary$ = CONCAT$(tmpSecondary$, "F")
iCurrentPosition = iCurrentPosition + 1
GOTO GetOutOfSelect
END IF
' Polish e.g. 'filipowicz'
IF EQUAL(MID$(InputString$, iCurrentPosition, 4), "WICZ") OR EQUAL(MID$(InputString$, iCurrentPosition, 4), "WITZ") THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "TS")
tmpSecondary$ = CONCAT$(tmpSecondary$, "FX")
iCurrentPosition = iCurrentPosition + 4
GOTO GetOutOfSelect
END IF
' else skip it
iCurrentPosition = iCurrentPosition + 1
CASE "X"
' French e.g. breaux
IF NOT (iCurrentPosition EQ Last AND (EQUAL(MyMID$(InputString$, iCurrentPosition - 3, 3), "IAU") OR EQUAL(MyMID$(InputString$, iCurrentPosition - 3, 3), "IAU") OR EQUAL(MyMID$(InputString$, iCurrentPosition - 2, 2), "AU") OR EQUAL(MyMID$(InputString$, iCurrentPosition - 2, 2), "OU"))) THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "KS")
tmpSecondary$ = CONCAT$(tmpSecondary$, "KS")
END IF
IF INSTR("CX", MID$(InputString$, iCurrentPosition + 1, 1)) THEN
iCurrentPosition = iCurrentPosition + 1
END IF
iCurrentPosition = iCurrentPosition + 1
CASE "Z"
' Chinese Pinyin e.g. 'zhao'
IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "H") THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "J")
tmpSecondary$ = CONCAT$(tmpSecondary$, "J")
iCurrentPosition = iCurrentPosition + 2
GOTO GetOutOfSelect
END IF
IF (ContainsAnyOf(MID$(InputString$, iCurrentPosition + 1, 2), "ZO|ZI|ZA") OR (IsSlavoGermanic(InputString$) AND (iCurrentPosition > 1 AND NOT(EQUAL(MyMID$(InputString$, iCurrentPosition - 1, 1), "T"))))) THEN
tmpPrimary$ = CONCAT$(tmpPrimary$, "S")
tmpSecondary$ = CONCAT$(tmpSecondary$, "TS")
ELSE
tmpPrimary$ = CONCAT$(tmpPrimary$, "S")
tmpSecondary$ = CONCAT$(tmpSecondary$, "S")
END IF
IF EQUAL(MID$(InputString$, iCurrentPosition + 1, 1), "Z") THEN
iCurrentPosition = iCurrentPosition + 2
ELSE
iCurrentPosition = iCurrentPosition + 1
END IF
DEFAULT
iCurrentPosition = iCurrentPosition + 1
END SELECT
UNTIL iCurrentPosition > Last
WITH DMValue
.Primary$ = tmpPrimary$
.Secondary$ = tmpSecondary$
END WITH
IF NOT(EQUAL(tmpPrimary$, tmpSecondary$)) THEN
WITH DMValue
.HasAlternate = TRUE
END WITH
END IF
RETURN TRUE
END FUNCTION
' --------
SUB DM_TEST(STRING Word2Find$)
' --------
LOCAL i
PRINT
DOUBLEMETAPHONE(Word2Find$)
MyPrimary$ = DMValue.Primary$
MySecondary$ = DMValue.Secondary$
MyAlternate = DMValue.HasAlternate
' PRINT Word2Find$, " ", MyPrimary$, " ", MySecondary$, " Alt: ", MyAlternate
FOR i = 1 TO MaxWords
DOUBLEMETAPHONE(Words$[i])
' PRINT DMValue.BaseWord$," ", DMValue.Primary$," ", DMValue.Secondary$," Alt: ", DMValue.HasAlternate
PRINT i, ". ", q$, Word2Find$, q$;
IF EQUAL(MyPrimary$, DMValue.Primary$) OR EQUAL(MyPrimary$, DMValue.Secondary$) OR EQUAL(MySecondary$, DMValue.Primary$) OR EQUAL(MySecondary$, DMValue.Secondary$) THEN
PRINT " sounds like ";
ELSE
PRINT " does NOT sound like ";
END IF
PRINT q$, Words$[i], q$
NEXT i
PRINT
END SUB
' *****************
' END SUBS & FUNCS
' *****************
' *****************
' MAIN
' *****************
INIT_VARS
DM_TEST("Eerten")
END
' *****************
' END MAIN
' *****************