You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Zero/ZeroLevel/Services/Web/UrlUtility.cs

527 lines
17 KiB

using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
namespace ZeroLevel.Services.Web
{
public static class UrlUtility
{
// Query string parsing support
public static IDictionary<string, string> ParseQueryString(string query)
{
return ParseQueryString(query, Encoding.UTF8);
}
public static IDictionary<string, string> ParseQueryString(string query, Encoding encoding)
{
if (query == null!)
{
return null!;
}
if (encoding == null!)
{
encoding = Encoding.Default;
}
if (query.Length > 0 && query[0] == '?')
{
query = query.Substring(1);
}
return CreateCollection(query, true, encoding);
}
public static string UrlEncode(string str)
{
return (str == null ? null : UrlEncode(str, Encoding.UTF8))!;
}
// URL encodes a path portion of a URL string and returns the encoded string.
public static string UrlPathEncode(string str)
{
if (str == null!)
{
return null!;
}
// recurse in case there is a query string
var i = str.IndexOf('?');
if (i >= 0)
{
return UrlPathEncode(str.Substring(0, i)) + str.Substring(i);
}
// encode DBCS characters and spaces only
return UrlEncodeSpaces(UrlEncodeNonAscii(str, Encoding.UTF8));
}
public static string UrlEncode(string str, Encoding encoding)
{
return (str == null ? null : Encoding.ASCII.GetString(UrlEncodeToBytes(str, encoding)))!;
}
public static string UrlEncodeUnicode(string str)
{
return (str == null ? null : UrlEncodeUnicodeStringToStringInternal(str, false))!;
}
/// <summary>
/// https://github.com/tmenier/Flurl
/// </summary>
public static string Combine(params string[] parts)
{
if (parts == null!)
throw new ArgumentNullException(nameof(parts));
string result = "";
bool inQuery = false, inFragment = false;
string CombineEnsureSingleSeparator(string a, string b, char separator)
{
if (string.IsNullOrEmpty(a)) return b;
if (string.IsNullOrEmpty(b)) return a;
return a.TrimEnd(separator) + separator + b.TrimStart(separator);
}
foreach (var part in parts)
{
if (string.IsNullOrEmpty(part))
continue;
if (result.EndsWith("?") || part.StartsWith("?"))
result = CombineEnsureSingleSeparator(result, part, '?');
else if (result.EndsWith("#") || part.StartsWith("#"))
result = CombineEnsureSingleSeparator(result, part, '#');
else if (inFragment)
result += part;
else if (inQuery)
result = CombineEnsureSingleSeparator(result, part, '&');
else
result = CombineEnsureSingleSeparator(result, part, '/');
if (part.Contains("#"))
{
inQuery = false;
inFragment = true;
}
else if (!inFragment && part.Contains("?"))
{
inQuery = true;
}
}
return EncodeIllegalCharacters(result);
}
/// <summary>
/// https://github.com/tmenier/Flurl
/// </summary>
public static string EncodeIllegalCharacters(string s, bool encodeSpaceAsPlus = false)
{
if (string.IsNullOrEmpty(s))
return s;
if (encodeSpaceAsPlus)
s = s.Replace(" ", "+");
// Uri.EscapeUriString mostly does what we want - encodes illegal characters only - but it has a quirk
// in that % isn't illegal if it's the start of a %-encoded sequence https://stackoverflow.com/a/47636037/62600
// no % characters, so avoid the regex overhead
if (!s.Contains("%"))
return Uri.EscapeDataString(s);
// pick out all %-hex-hex matches and avoid double-encoding
return Regex.Replace(s, "(.*?)((%[0-9A-Fa-f]{2})|$)", c => {
var a = c.Groups[1].Value; // group 1 is a sequence with no %-encoding - encode illegal characters
var b = c.Groups[2].Value; // group 2 is a valid 3-character %-encoded sequence - leave it alone!
return Uri.EscapeDataString(a) + b;
});
}
private static string UrlEncodeUnicodeStringToStringInternal(string s, bool ignoreAscii)
{
var l = s.Length;
var sb = new StringBuilder(l);
for (var i = 0; i < l; i++)
{
var ch = s[i];
if ((ch & 0xff80) == 0)
{ // 7 bit?
if (ignoreAscii || IsSafe(ch))
{
sb.Append(ch);
}
else if (ch == ' ')
{
sb.Append('+');
}
else
{
sb.Append('%');
sb.Append(IntToHex((ch >> 4) & 0xf));
sb.Append(IntToHex((ch) & 0xf));
}
}
else
{ // arbitrary Unicode?
sb.Append("%u");
sb.Append(IntToHex((ch >> 12) & 0xf));
sb.Append(IntToHex((ch >> 8) & 0xf));
sb.Append(IntToHex((ch >> 4) & 0xf));
sb.Append(IntToHex((ch) & 0xf));
}
}
return sb.ToString();
}
// Helper to encode the non-ASCII url characters only
private static string UrlEncodeNonAscii(string str, Encoding e)
{
if (string.IsNullOrEmpty(str))
{
return str;
}
if (e == null!)
{
e = Encoding.UTF8;
}
var bytes = e.GetBytes(str);
bytes = UrlEncodeBytesToBytesInternalNonAscii(bytes, 0, bytes.Length, false);
return Encoding.ASCII.GetString(bytes);
}
// Helper to encode spaces only
private static string UrlEncodeSpaces(string str)
{
if (str != null && str.IndexOf(' ') >= 0)
{
str = str.Replace(" ", "%20");
}
return str!;
}
public static byte[] UrlEncodeToBytes(string str, Encoding e)
{
if (str == null!)
{
return null!;
}
var bytes = e.GetBytes(str);
return UrlEncodeBytesToBytesInternal(bytes, 0, bytes.Length, false);
}
public static string UrlDecode(string str, Encoding e)
{
return (str == null ? null : UrlDecodeStringFromStringInternal(str, e))!;
}
// Implementation for encoding
private static byte[] UrlEncodeBytesToBytesInternal(byte[] bytes, int offset, int count, bool alwaysCreateReturnValue)
{
var cSpaces = 0;
var cUnsafe = 0;
// count them first
for (var i = 0; i < count; i++)
{
var ch = (char)bytes[offset + i];
if (ch == ' ')
{
cSpaces++;
}
else if (!IsSafe(ch))
{
cUnsafe++;
}
}
// nothing to expand?
if (!alwaysCreateReturnValue && cSpaces == 0 && cUnsafe == 0)
{
return bytes;
}
// expand not 'safe' characters into %XX, spaces to +s
var expandedBytes = new byte[count + cUnsafe * 2];
var pos = 0;
for (var i = 0; i < count; i++)
{
var b = bytes[offset + i];
var ch = (char)b;
if (IsSafe(ch))
{
expandedBytes[pos++] = b;
}
else if (ch == ' ')
{
expandedBytes[pos++] = (byte)'+';
}
else
{
expandedBytes[pos++] = (byte)'%';
expandedBytes[pos++] = (byte)IntToHex((b >> 4) & 0xf);
expandedBytes[pos++] = (byte)IntToHex(b & 0x0f);
}
}
return expandedBytes;
}
private static bool IsNonAsciiByte(byte b)
{
return (b >= 0x7F || b < 0x20);
}
private static byte[] UrlEncodeBytesToBytesInternalNonAscii(byte[] bytes, int offset, int count, bool alwaysCreateReturnValue)
{
var cNonAscii = 0;
// count them first
for (var i = 0; i < count; i++)
{
if (IsNonAsciiByte(bytes[offset + i]))
{
cNonAscii++;
}
}
// nothing to expand?
if (!alwaysCreateReturnValue && cNonAscii == 0)
{
return bytes;
}
// expand not 'safe' characters into %XX, spaces to +s
var expandedBytes = new byte[count + cNonAscii * 2];
var pos = 0;
for (var i = 0; i < count; i++)
{
var b = bytes[offset + i];
if (IsNonAsciiByte(b))
{
expandedBytes[pos++] = (byte)'%';
expandedBytes[pos++] = (byte)IntToHex((b >> 4) & 0xf);
expandedBytes[pos++] = (byte)IntToHex(b & 0x0f);
}
else
{
expandedBytes[pos++] = b;
}
}
return expandedBytes;
}
private static string UrlDecodeStringFromStringInternal(string s, Encoding e)
{
var count = s.Length;
var helper = new UrlDecoder(count, e);
// go through the string's chars collapsing %XX and %uXXXX and
// appending each char as char, with exception of %XX constructs
// that are appended as bytes
for (var pos = 0; pos < count; pos++)
{
var ch = s[pos];
if (ch == '+')
{
ch = ' ';
}
else if (ch == '%' && pos < count - 2)
{
if (s[pos + 1] == 'u' && pos < count - 5)
{
var h1 = HexToInt(s[pos + 2]);
var h2 = HexToInt(s[pos + 3]);
var h3 = HexToInt(s[pos + 4]);
var h4 = HexToInt(s[pos + 5]);
if (h1 < 0 || h2 < 0 || h3 < 0 || h4 < 0)
continue; // valid 4 hex chars
ch = (char)((h1 << 12) | (h2 << 8) | (h3 << 4) | h4);
pos += 5;
// only add as char
helper.AddChar(ch);
continue;
}
else
{
var h1 = HexToInt(s[pos + 1]);
var h2 = HexToInt(s[pos + 2]);
if (h1 < 0 || h2 < 0)
continue; // valid 2 hex chars
var b = (byte)((h1 << 4) | h2);
pos += 2;
// don't add as char
helper.AddByte(b);
continue;
}
}
if ((ch & 0xFF80) == 0)
{
helper.AddByte((byte)ch); // 7 bit have to go as bytes because of Unicode
}
else
{
helper.AddChar(ch);
}
}
return helper.GetString();
}
// Private helpers for URL encoding/decoding
private static int HexToInt(char h)
{
return (h >= '0' && h <= '9') ? h - '0' :
(h >= 'a' && h <= 'f') ? h - 'a' + 10 :
(h >= 'A' && h <= 'F') ? h - 'A' + 10 :
-1;
}
private static char IntToHex(int n)
{
if (n <= 9)
{
return (char)(n + '0');
}
return (char)(n - 10 + 'a');
}
// Set of safe chars, from RFC 1738.4 minus '+'
internal static bool IsSafe(char ch)
{
if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9')
{
return true;
}
switch (ch)
{
case '-':
case '_':
case '.':
case '!':
case '*':
case '\'':
case '(':
case ')':
return true;
}
return false;
}
// Internal class to facilitate URL decoding -- keeps char buffer and byte buffer, allows appending of either chars or bytes
private class UrlDecoder
{
private readonly int _bufferSize;
// Accumulate characters in a special array
private int _numChars;
private readonly char[] _charBuffer;
// Accumulate bytes for decoding into characters in a special array
private int _numBytes;
private byte[] _byteBuffer;
// Encoding to convert chars to bytes
private readonly Encoding _encoding;
private void FlushBytes()
{
if (_numBytes <= 0) return;
_numChars += _encoding.GetChars(_byteBuffer, 0, _numBytes, _charBuffer, _numChars);
_numBytes = 0;
}
internal UrlDecoder(int bufferSize, Encoding encoding)
{
_bufferSize = bufferSize;
_encoding = encoding;
_charBuffer = new char[bufferSize];
// byte buffer created on demand
}
internal void AddChar(char ch)
{
if (_numBytes > 0)
{
FlushBytes();
}
_charBuffer[_numChars++] = ch;
}
internal void AddByte(byte b)
{
if (_byteBuffer == null!)
{
_byteBuffer = new byte[_bufferSize];
}
_byteBuffer[_numBytes++] = b;
}
internal string GetString()
{
if (_numBytes > 0)
{
FlushBytes();
}
return _numChars > 0 ? new string(_charBuffer, 0, _numChars) : string.Empty;
}
}
private static IDictionary<string, string> CreateCollection(string s, bool urlencoded, Encoding encoding)
{
var result = new Dictionary<string, string>();
if (string.IsNullOrWhiteSpace(s))
return result;
var i = 0;
var l = s.Length;
while (i < l)
{
// find next & while noting first = on the way (and if there are more)
var si = i;
var ti = -1;
while (i < l)
{
var ch = s[i];
if (ch == '=')
{
if (ti < 0)
ti = i;
}
else if (ch == '&')
{
break;
}
i++;
}
// extract the name / value pair
string name = null!;
string value = null!;
if (ti >= 0)
{
name = s.Substring(si, ti - si);
value = s.Substring(ti + 1, i - ti - 1);
}
else
{
name = s.Substring(si, i - si);
}
// add name / value pair to the collection
if (urlencoded)
{
result.Add(
UrlDecode(name, encoding).ToLower(),
UrlDecode(value, encoding));
}
else
{
result.Add(name.ToLower(), value);
}
i++;
}
return result;
}
}
}

Powered by TurnKey Linux.