Brute force address normalization for grouping. Not an exact science but works fairly well for addresses in the US. Use at your own risk.
/// <summary>
/// Used to parse and normalize an address.
/// </summary>
/// <param name="addressline"></param>
/// <param name="cityName"></param>
/// <param name="stateCode"></param>
/// <param name="postalCode"></param>
/// <returns>A normalized string devoid of unit numbers</returns>
internal string ParsedStreetAddress(string addressline, string cityName, string stateCode, string postalCode)
var punctuation = new Regex(@"[^\w\s#]+", RegexOptions.Compiled); // keep "#" so we can parse out apartments later
var orphanedHashes = new Regex(@"#(\s)+", RegexOptions.Compiled); // need to standardize how hashes appear
addressline = addressline ?? "";
cityName = cityName ?? "";
stateCode = stateCode ?? "";
postalCode = postalCode ?? "";
// lower it to simplify comparisons
addressline = addressline.ToLowerInvariant();
// remove punctuation
addressline = punctuation.Replace(addressline, "");
// make sure hashes aren't orphans
addressline = orphanedHashes.Replace(addressline, "");
// walk through and parse
var segments = addressline.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
var normalizedAddress = "";
for (var i = 0; i < segments.Length; i++)
var segment = segments[i];
// deal with apartment numbers or alternate input of unit numbers
if (segment.StartsWith("#"))
// a unit number
// remove subpremises
var subpremises = new[] { "suite", "ste", "apt", "rm", "room", "bldg", "building", "route", "rte", "pavillion", "department", "dept", "unit" };
if (subpremises.Any(subpremise => subpremise == segment))
// we have a named subpremise
if ((i + 1) < segments.Length)
var nextSegment = segments[i + 1];
// be careful here too much and we skip valid data and too little we include unit numbers
if (nextSegment.Length < 5)
// advance past the next part, which should be the subpremise number
// floors are difficult
if (segment == "floor" || segment == "fl" || segment == "flr")
if (i != 0)
// typical format is ... 2ND FLOOR ....
// so we have to get the previous segment so we can remove it
var previousSegment = segments[i - 1];
// remove the previous segment from our current normalized string
normalizedAddress = normalizedAddress.RemoveFromEnd(previousSegment);
// normalize
var normalize = new Dictionary<string, string>
{"boulevard", "blvd"},
{"circle", "cir"},
{"court", "ct"},
{"drive", "dr"},
{"east", "e"},
{"floor", "flr"},
{"fl", "flr"},
{"north", "n"},
{"northwest", "nw"},
{"place", "pl"},
{"road", "rd"},
{"route", "rte"},
{"south", "s"},
{"southeast", "se"},
{"southwest", "sw"},
{"sth", "s"},
{"street", "st"},
{"way", "wy"},
{"west", "w"},
if (normalize.Keys.Any(n => n == segment))
var matches = normalize.Keys
.Where(n => n == segment)
segment = matches
.Aggregate(segment, (current, match) => current.Replace(match, normalize[match]));
// let see if we can drop the direction indicator
if (segment == "n" ||
segment == "ne" ||
segment == "nw" ||
segment == "e" ||
segment == "s" ||
segment == "se" ||
segment == "sw" ||
segment == "w")
if (i > 2) // lets drop it since it's most likely not a street name
// add this bit to the parsed output
normalizedAddress += segment;
if (postalCode.Length > 5)
postalCode = postalCode.Substring(0, 5);
normalizedAddress = string.Format("{0}{1}{2}{3}", normalizedAddress, cityName, stateCode, postalCode)
return new string(punctuation.Replace(normalizedAddress, "")
.Where(c => !char.IsWhiteSpace(c))
