8 using System.Collections.Generic;
14 public static class FuzzyText
24 public static double Levenshtein(
string input,
string comparedTo,
bool caseSensitive =
false)
26 if (
string.IsNullOrEmpty(input) ||
string.IsNullOrEmpty(comparedTo))
return 999;
29 input = input.ToLower();
30 comparedTo = comparedTo.ToLower();
32 int inputLen = input.Length;
33 int comparedToLen = comparedTo.Length;
35 int[,] matrix =
new int[inputLen, comparedToLen];
38 for (
int i = 0; i < inputLen; i++) matrix[i, 0] = i;
39 for (
int i = 0; i < comparedToLen; i++) matrix[0, i] = i;
42 for (
int i = 1; i < inputLen; i++)
44 var si = input[i - 1];
45 for (
int j = 1; j < comparedToLen; j++)
47 var tj = comparedTo[j - 1];
48 int cost = (si == tj) ? 0 : 1;
50 var above = matrix[i - 1, j];
51 var left = matrix[i, j - 1];
52 var diag = matrix[i - 1, j - 1];
53 var cell = FindMinimum(above + 1, left + 1, diag + cost);
58 var trans = matrix[i - 2, j - 2] + 1;
59 if (input[i - 2] != comparedTo[j - 1]) trans++;
60 if (input[i - 1] != comparedTo[j - 2]) trans++;
61 if (cell > trans) cell = trans;
66 return (
double)1/(1+matrix[inputLen - 1, comparedToLen - 1]);
69 private static int FindMinimum(params
int[] p)
71 if (null == p)
return int.MinValue;
72 int min =
int.MaxValue;
73 for (
int i = 0; i < p.Length; i++)
75 if (min > p[i]) min = p[i];
89 public static double DiceCoefficient(
string input,
string comparedTo)
91 var ngrams = ToBiGrams(input);
92 var compareToNgrams = ToBiGrams(comparedTo);
93 return DiceCoefficient(ngrams,compareToNgrams);
102 private static double DiceCoefficient(
string[] nGrams,
string[] compareToNGrams)
105 foreach (var nGram
in nGrams)
107 foreach (
string x
in compareToNGrams)
116 if (matches == 0)
return 0.0d;
117 double totalBigrams = nGrams.Length + compareToNGrams.Length;
118 return (2 * matches) / totalBigrams;
121 private static string[] ToBiGrams(
string input)
126 input = SinglePercent + input + SinglePound;
127 return ToNGrams(input, 2);
130 private static string[] ToTriGrams(
string input)
135 input = DoublePercent + input + DoublePount;
136 return ToNGrams(input, 3);
139 private static string[] ToNGrams(
string input,
int nLength)
141 int itemsCount = input.Length - 1;
142 string[] ngrams =
new string[input.Length - 1];
143 for (
int i = 0; i < itemsCount; i++) ngrams[i] = input.Substring(i, nLength);
147 private const string SinglePercent =
"%";
148 private const string SinglePound =
"#";
149 private const string DoublePercent =
"&&";
150 private const string DoublePount =
"##";