7 using System.Collections.Generic;
13 public static class DiceCoefficientExtensions
22 public static double DiceCoefficient(
this string input,
string comparedTo)
24 var ngrams = input.ToBiGrams();
25 var compareToNgrams = comparedTo.ToBiGrams();
26 return ngrams.DiceCoefficient(compareToNgrams);
35 private static double DiceCoefficient(
this string[] nGrams,
string[] compareToNGrams)
38 foreach (var nGram
in nGrams)
40 if (compareToNGrams.Any(x => x == nGram)) matches++;
42 if (matches == 0)
return 0.0d;
43 double totalBigrams = nGrams.Length + compareToNGrams.Length;
44 return (2 * matches) / totalBigrams;
47 private static string[] ToBiGrams(
this string input)
52 input = SinglePercent + input + SinglePound;
53 return ToNGrams(input, 2);
56 private static string[] ToTriGrams(
this string input)
61 input = DoublePercent + input + DoublePount;
62 return ToNGrams(input, 3);
65 private static string[] ToNGrams(
string input,
int nLength)
67 int itemsCount = input.Length - 1;
68 string[] ngrams =
new string[input.Length - 1];
69 for (
int i = 0; i < itemsCount; i++) ngrams[i] = input.Substring(i, nLength);
73 private const string SinglePercent =
"%";
74 private const string SinglePound =
"#";
75 private const string DoublePercent =
"&&";
76 private const string DoublePount =
"##";