|
|
using System.Collections.Generic;
|
|
|
using System.Text;
|
|
|
using ZeroLevel.Services.Text;
|
|
|
|
|
|
namespace ZeroLevel.Services.Web
|
|
|
{
|
|
|
public static class HtmlUtility
|
|
|
{
|
|
|
#region Helpers
|
|
|
private class SymToken
|
|
|
{
|
|
|
public readonly char Value;
|
|
|
public readonly SymToken Preview;
|
|
|
private SymToken _lazyNext = null;
|
|
|
private readonly int _index;
|
|
|
private readonly string _line;
|
|
|
|
|
|
public SymToken Next
|
|
|
{
|
|
|
get
|
|
|
{
|
|
|
if (_line.Length == 0 || _index == _line.Length - 1)
|
|
|
{
|
|
|
return null;
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
if (_lazyNext == null)
|
|
|
{
|
|
|
_lazyNext = new SymToken(_line, _index + 1, this);
|
|
|
}
|
|
|
}
|
|
|
return _lazyNext;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
public SymToken(string line, int index, SymToken preview)
|
|
|
{
|
|
|
_index = index;
|
|
|
_line = line;
|
|
|
if (_line.Length > _index)
|
|
|
{
|
|
|
Value = _line[_index];
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
Value = char.MinValue;
|
|
|
}
|
|
|
Preview = preview;
|
|
|
}
|
|
|
|
|
|
public bool Test(string testLine)
|
|
|
{
|
|
|
var cursor = this;
|
|
|
for (int i = 0; i < testLine.Length; i++)
|
|
|
{
|
|
|
if (char.ToLowerInvariant(testLine[i]) != cursor.Value) return false;
|
|
|
cursor = cursor.Next;
|
|
|
}
|
|
|
return true;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
private class StringCursor
|
|
|
{
|
|
|
public SymToken Token;
|
|
|
|
|
|
public StringCursor(string line)
|
|
|
{
|
|
|
if (false == string.IsNullOrEmpty(line))
|
|
|
{
|
|
|
Token = new SymToken(line, 0, null);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
Token = null;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
public char Preview
|
|
|
{
|
|
|
get
|
|
|
{
|
|
|
if (Token.Preview != null)
|
|
|
{
|
|
|
return Token.Preview.Value;
|
|
|
}
|
|
|
return char.MinValue;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
public char Current
|
|
|
{
|
|
|
get
|
|
|
{
|
|
|
if (Token != null)
|
|
|
{
|
|
|
return Token.Value;
|
|
|
}
|
|
|
return char.MinValue;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
public char Next
|
|
|
{
|
|
|
get
|
|
|
{
|
|
|
if (Token.Next != null)
|
|
|
{
|
|
|
return Token.Next.Value;
|
|
|
}
|
|
|
return char.MinValue;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
public bool Test(string testLine)
|
|
|
{
|
|
|
return Token.Test(testLine);
|
|
|
}
|
|
|
|
|
|
public void MoveAfter(string testLine)
|
|
|
{
|
|
|
for (int i = 0; i < testLine.Length; i++)
|
|
|
{
|
|
|
if (char.ToLowerInvariant(testLine[i]) == Token.Value)
|
|
|
Token = Token.Next;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
public bool MoveNext()
|
|
|
{
|
|
|
if (Token != null)
|
|
|
{
|
|
|
Token = Token.Next;
|
|
|
return true;
|
|
|
}
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
public bool EOF
|
|
|
{
|
|
|
get
|
|
|
{
|
|
|
return Token == null;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
#endregion
|
|
|
|
|
|
#region Mapping
|
|
|
private readonly static Dictionary<string, string> _entityMap = new Dictionary<string, string>
|
|
|
{
|
|
|
{"quot;", "\""}, {"apos;", "'"}, {"amp;", "&"}, {"lt;", "<"},
|
|
|
{"gt;", ">"}, {"nbsp;", ""}, {"iexcl;", "¡"}, {"cent;", "¢"},
|
|
|
{"pound;", "£"}, {"curren;", "¤"}, {"yen;", "¥"}, {"brvbar;", "¦"},
|
|
|
{"sect;", "§"}, {"uml;", "¨"}, {"copy;", "©"}, {"ordf;", "ª"},
|
|
|
{"laquo;", "«"}, {"not;", "¬"}, {"shy;", ""}, {"reg;", "®"},
|
|
|
{"macr;", "¯"}, {"deg;", "°"}, {"plusmn;", "±"}, {"sup2;", "²"},
|
|
|
{"sup3;", "³"}, {"acute;", "´"}, {"micro;", "µ"}, {"para;", "¶"},
|
|
|
{"middot;", "·"}, {"cedil;", "¸"}, {"sup1;", "¹"}, {"ordm;", "º"},
|
|
|
{"raquo;", "»"}, {"frac14;", "¼"}, {"frac12;", "½"}, {"frac34;", "¾"},
|
|
|
{"iquest;", "¿"}, {"times;", "×"}, {"divide;", "÷"}, {"Agrave;", "À"},
|
|
|
{"Aacute;", "Á"}, {"Acirc;", "Â"}, {"Atilde;", "Ã"}, {"Auml;", "Ä"},
|
|
|
{"Aring;", "Å"}, {"AElig;", "Æ"}, {"Ccedil;", "Ç"}, {"Egrave;", "È"},
|
|
|
{"Eacute;", "É"}, {"Ecirc;", "Ê"}, {"Euml;", "Ë"}, {"Igrave;", "Ì"},
|
|
|
{"Iacute;", "Í"}, {"Icirc;", "Î"}, {"Iuml;", "Ï"}, {"ETH;", "Ð"},
|
|
|
{"Ntilde;", "Ñ"}, {"Ograve;", "Ò"}, {"Oacute;", "Ó"}, {"Ocirc;", "Ô"},
|
|
|
{"Otilde;", "Õ"}, {"Ouml;", "Ö"}, {"Oslash;", "Ø"}, {"Ugrave;", "Ù"},
|
|
|
{"Uacute;", "Ú"}, {"Ucirc;", "Û"}, {"Uuml;", "Ü"}, {"Yacute;", "Ý"},
|
|
|
{"THORN;", "Þ"}, {"szlig;", "ß"}, {"agrave;", "à"}, {"aacute;", "á"},
|
|
|
{"acirc;", "â"}, {"atilde;", "ã"}, {"auml;", "ä"}, {"aring;", "å"},
|
|
|
{"aelig;", "æ"}, {"ccedil;", "ç"}, {"egrave;", "è"}, {"eacute;", "é"},
|
|
|
{"ecirc;", "ê"}, {"euml;", "ë"}, {"igrave;", "ì"}, {"iacute;", "í"},
|
|
|
{"icirc;", "î"}, {"iuml;", "ï"}, {"eth;", "ð"}, {"ntilde;", "ñ"},
|
|
|
{"ograve;", "ò"}, {"oacute;", "ó"}, {"ocirc;", "ô"}, {"otilde;", "õ"},
|
|
|
{"ouml;", "ö"}, {"oslash;", "ø"}, {"ugrave;", "ù"}, {"uacute;", "ú"},
|
|
|
{"ucirc;", "û"}, {"uuml;", "ü"}, {"yacute;", "ý"}, {"thorn;", "þ"},
|
|
|
{"yuml;", "ÿ"}
|
|
|
};
|
|
|
|
|
|
private readonly static Dictionary<string, string> _entityReverseMap = new Dictionary<string, string>
|
|
|
{
|
|
|
{"\"", """}, {"'", "'"}, {"&", "&"}, {"<", "<"},
|
|
|
{">", ">"}, {"", " "}, {"¡", "¡"}, {"¢", "¢"},
|
|
|
{"£", "£"}, {"¤", "¤"}, {"¥", "¥"}, {"¦", "¦"},
|
|
|
{"§", "§"}, {"¨", "¨"}, {"©", "©"}, {"ª", "ª"},
|
|
|
{"«", "«"}, {"¬", "¬"}, {"", "­"}, {"®", "®"},
|
|
|
{"¯", "¯"}, {"°", "°"}, {"±", "±"}, {"²", "²"},
|
|
|
{"³", "³"}, {"´", "´"}, {"µ", "µ"}, {"¶", "¶"},
|
|
|
{"·", "·"}, {"¸", "¸"}, {"¹", "¹"}, {"º", "º"},
|
|
|
{"»", "»"}, {"¼", "¼"}, {"½", "½"}, {"¾", "¾"},
|
|
|
{"¿", "¿"}, {"×", "×"}, {"÷", "÷"}, {"À", "À"},
|
|
|
{"Á", "Á"}, {"Â", "Â"}, {"Ã", "Ã"}, {"Ä", "Ä"},
|
|
|
{"Å", "Å"}, {"Æ", "Æ"}, {"Ç", "Ç"}, {"È", "È"},
|
|
|
{"É", "É"}, {"Ê", "Ê"}, {"Ë", "Ë"}, {"Ì", "Ì"},
|
|
|
{"Í", "Í"}, {"Î", "Î"}, {"Ï", "Ï"}, {"Ð", "Ð"},
|
|
|
{"Ñ", "Ñ"}, {"Ò", "Ò"}, {"Ó", "Ó"}, {"Ô", "Ô"},
|
|
|
{"Õ", "Õ"}, {"Ö", "Ö"}, {"Ø", "Ø"}, {"Ù", "Ù"},
|
|
|
{"Ú", "Ú"}, {"Û", "Û"}, {"Ü", "Ü"}, {"Ý", "Ý"},
|
|
|
{"Þ", "Þ"}, {"ß", "ß"}, {"à", "à"}, {"á", "á"},
|
|
|
{"â", "â"}, {"ã", "ã"}, {"ä", "ä"}, {"å", "å"},
|
|
|
{"æ", "æ"}, {"ç", "ç"}, {"è", "è"}, {"é", "é"},
|
|
|
{"ê", "ê"}, {"ë", "ë"}, {"ì", "ì"}, {"í", "í"},
|
|
|
{"î", "î"}, {"ï", "ï"}, {"ð", "ð"}, {"ñ", "ñ"},
|
|
|
{"ò", "ò"}, {"ó", "ó"}, {"ô", "ô"}, {"õ", "õ"},
|
|
|
{"ö", "ö"}, {"ø", "ø"}, {"ù", "ù"}, {"ú", "ú"},
|
|
|
{"û", "û"}, {"ü", "ü"}, {"ý", "ý"}, {"þ", "þ"},
|
|
|
{"ÿ", "ÿ"}
|
|
|
};
|
|
|
|
|
|
private readonly static Dictionary<string, string> _hexMap = new Dictionary<string, string>
|
|
|
{
|
|
|
{"9;","\t"}, {"xa;","\n"}, {"xd;","\r"}, {"20;"," "},
|
|
|
{"21;","!"}, {"22;","\""}, {"23;","#"}, {"24;","$"},
|
|
|
{"25;","%"}, {"26;","&"}, {"27;","'"}, {"28;","("},
|
|
|
{"29;",")"}, {"2a;","*"}, {"2b;","+"}, {"2c;",","},
|
|
|
|
|
|
{"2d;","-"}, {"2e;","."}, {"2f;","/"}, {"3a;",":"},
|
|
|
{"3b;",";"}, {"3c;","<"}, {"3d;","="}, {"3e;",">"},
|
|
|
{"3f;","?"}, {"40;","@"}, {"5b;","["}, {"5c;","\\"},
|
|
|
{"5d;","]"}, {"5e;","^"}, {"60;","`"}, {"7b;","{"},
|
|
|
|
|
|
{"a0;"," "}, {"a1;","¡"}, {"a2;","¢"}, {"a3;","£"},
|
|
|
{ "a4;","¤"}, {"a5;","¥"}, {"a6;","¦"}, {"a7;","§"},
|
|
|
{"a9;","©"}, {"ab;","«"}, {"ae;","®"}, {"b0;","°"},
|
|
|
{"b1;","±"}, {"b4;","´"}, {"b5;","µ"}, {"b6;","¶"},
|
|
|
|
|
|
{"b7;","·"}, {"bb;","»"}, {"bc;","¼"}, {"bd;","½"},
|
|
|
{"be;","¾"}, {"bf;","¿"}, {"f7;","÷"}, {"f8;","ø"}
|
|
|
};
|
|
|
|
|
|
private readonly static Dictionary<string, string> _numMap = new Dictionary<string, string>
|
|
|
{
|
|
|
{"9;","\t"}, {"10;","\n"}, {"13;","\r"}, {"32;"," "},
|
|
|
{"33;","!"}, {"34;","\""}, {"35;","#"}, {"36;","$"},
|
|
|
{"37;","%"}, {"38;","&"}, {"39;","'"}, {"40;","("},
|
|
|
{"41;",")"}, {"42;","*"}, {"43;","+"}, {"44;",","},
|
|
|
|
|
|
{"45;","-"}, {"46;","."}, {"47;","/"}, {"58;",":"},
|
|
|
{"59;",";"}, {"60;","<"}, {"61;","="}, {"62;",">"},
|
|
|
{"63;","?"}, {"64;","@"}, {"91;","["}, {"92;","\\"},
|
|
|
{"93;","]"}, {"94;","^"}, {"96;","`"}, {"123;","{"},
|
|
|
|
|
|
{"160;"," "}, {"161;","¡"}, {"162;","¢"}, {"163;","£"},
|
|
|
{"164;","¤"}, {"165;","¥"}, {"166;","¦"}, {"167;","§"},
|
|
|
{"169;","©"}, {"171;","«"}, {"174;","®"}, {"176;","°"},
|
|
|
{"177;","±"}, {"180;","´"}, {"181;","µ"}, {"182;","¶"},
|
|
|
|
|
|
{"183;","·"}, {"187;","»"}, {"188;","¼"}, {"189;","½"},
|
|
|
{"190;","¾"}, {"191;","¿"}, {"247;","÷"}, {"248;","ø"}
|
|
|
};
|
|
|
#endregion
|
|
|
|
|
|
public static string DecodeHtmlEntities(string line)
|
|
|
{
|
|
|
if (string.IsNullOrWhiteSpace(line)) return line;
|
|
|
var result = new StringBuilder();
|
|
|
var cursor = new StringCursor(line);
|
|
|
bool found = false;
|
|
|
do
|
|
|
{
|
|
|
found = false;
|
|
|
if (cursor.EOF) break;
|
|
|
switch (cursor.Current)
|
|
|
{
|
|
|
case '&':
|
|
|
var buf = cursor.Token.Next.Next;
|
|
|
switch (cursor.Next)
|
|
|
{
|
|
|
case '#': // HEX or DEC
|
|
|
switch (buf.Value)
|
|
|
{
|
|
|
case 'x': // HEX
|
|
|
buf = buf.Next;
|
|
|
foreach (var hexPair in _hexMap)
|
|
|
{
|
|
|
if (buf.Test(hexPair.Key))
|
|
|
{
|
|
|
cursor.MoveNext();
|
|
|
cursor.MoveNext();
|
|
|
cursor.MoveNext();
|
|
|
cursor.MoveAfter(hexPair.Key);
|
|
|
result.Append(hexPair.Value);
|
|
|
found = true;
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
break;
|
|
|
default: // DEC
|
|
|
foreach (var decPair in _numMap)
|
|
|
{
|
|
|
if (buf.Test(decPair.Key))
|
|
|
{
|
|
|
cursor.MoveNext();
|
|
|
cursor.MoveNext();
|
|
|
cursor.MoveAfter(decPair.Key);
|
|
|
result.Append(decPair.Value);
|
|
|
found = true;
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
break;
|
|
|
}
|
|
|
break;
|
|
|
default: // Entity
|
|
|
foreach (var pair in _entityMap)
|
|
|
{
|
|
|
if (cursor.Token.Next.Test(pair.Key))
|
|
|
{
|
|
|
cursor.MoveNext();
|
|
|
cursor.MoveAfter(pair.Key);
|
|
|
result.Append(pair.Value);
|
|
|
found = true;
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
|
break;
|
|
|
}
|
|
|
if (false == found)
|
|
|
{
|
|
|
result.Append(cursor.Current);
|
|
|
}
|
|
|
break;
|
|
|
default:
|
|
|
result.Append(cursor.Current);
|
|
|
break;
|
|
|
}
|
|
|
} while (found || cursor.MoveNext());
|
|
|
return result.ToString();
|
|
|
}
|
|
|
|
|
|
public static string EncodeHtmlEntities(string line)
|
|
|
{
|
|
|
if (string.IsNullOrWhiteSpace(line)) return line;
|
|
|
var result = new StringBuilder();
|
|
|
var cursor = new TStringReader(line);
|
|
|
while (cursor.EOF == false)
|
|
|
{
|
|
|
if (cursor.Current == '&')
|
|
|
{
|
|
|
cursor.Move();
|
|
|
var identity = cursor.ReadIdentity();
|
|
|
cursor.MoveBack();
|
|
|
if (_entityMap.ContainsKey(identity + ";"))
|
|
|
{
|
|
|
result.Append(cursor.Current);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
result.Append(_entityReverseMap["&"]);
|
|
|
}
|
|
|
}
|
|
|
else if (_entityReverseMap.ContainsKey(cursor.Current.ToString()))
|
|
|
{
|
|
|
result.Append(_entityReverseMap[cursor.Current.ToString()]);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
result.Append(cursor.Current);
|
|
|
}
|
|
|
cursor.Move();
|
|
|
}
|
|
|
return result.ToString();
|
|
|
}
|
|
|
}
|
|
|
}
|