diff --git a/ZeroLevel/Services/Web/UrlUtility.cs b/ZeroLevel/Services/Web/UrlUtility.cs index 82e760d..b92d89f 100644 --- a/ZeroLevel/Services/Web/UrlUtility.cs +++ b/ZeroLevel/Services/Web/UrlUtility.cs @@ -1,5 +1,7 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.Text; +using System.Text.RegularExpressions; namespace ZeroLevel.Services.Web { @@ -60,6 +62,78 @@ namespace ZeroLevel.Services.Web return str == null ? null : UrlEncodeUnicodeStringToStringInternal(str, false); } + /// + /// https://github.com/tmenier/Flurl + /// + public static string Combine(params string[] parts) + { + if (parts == null) + throw new ArgumentNullException(nameof(parts)); + + string result = ""; + bool inQuery = false, inFragment = false; + + string CombineEnsureSingleSeparator(string a, string b, char separator) + { + if (string.IsNullOrEmpty(a)) return b; + if (string.IsNullOrEmpty(b)) return a; + return a.TrimEnd(separator) + separator + b.TrimStart(separator); + } + + foreach (var part in parts) + { + if (string.IsNullOrEmpty(part)) + continue; + + if (result.EndsWith("?") || part.StartsWith("?")) + result = CombineEnsureSingleSeparator(result, part, '?'); + else if (result.EndsWith("#") || part.StartsWith("#")) + result = CombineEnsureSingleSeparator(result, part, '#'); + else if (inFragment) + result += part; + else if (inQuery) + result = CombineEnsureSingleSeparator(result, part, '&'); + else + result = CombineEnsureSingleSeparator(result, part, '/'); + + if (part.Contains("#")) + { + inQuery = false; + inFragment = true; + } + else if (!inFragment && part.Contains("?")) + { + inQuery = true; + } + } + return EncodeIllegalCharacters(result); + } + /// + /// https://github.com/tmenier/Flurl + /// + public static string EncodeIllegalCharacters(string s, bool encodeSpaceAsPlus = false) + { + if (string.IsNullOrEmpty(s)) + return s; + + if (encodeSpaceAsPlus) + s = s.Replace(" ", "+"); + + // Uri.EscapeUriString mostly does what we want - encodes illegal characters only - but it has a quirk + // in that % isn't illegal if it's the start of a %-encoded sequence https://stackoverflow.com/a/47636037/62600 + + // no % characters, so avoid the regex overhead + if (!s.Contains("%")) + return Uri.EscapeUriString(s); + + // pick out all %-hex-hex matches and avoid double-encoding + return Regex.Replace(s, "(.*?)((%[0-9A-Fa-f]{2})|$)", c => { + var a = c.Groups[1].Value; // group 1 is a sequence with no %-encoding - encode illegal characters + var b = c.Groups[2].Value; // group 2 is a valid 3-character %-encoded sequence - leave it alone! + return Uri.EscapeUriString(a) + b; + }); + } + private static string UrlEncodeUnicodeStringToStringInternal(string s, bool ignoreAscii) { var l = s.Length;