diff --git a/.gitignore b/.gitignore index 0e40280..73838a3 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,8 @@ bower_components *.sbr *.scc *.dbmdl +*.nupkg +*.p7s *.sln.ide [Bb]in [Dd]ebug*/ @@ -41,6 +43,8 @@ TestsResult.html # NuGet /packages/ +**/packages/* +!**/packages/build/ # Build /artifacts/ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/ExampleList.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/ExampleList.cs new file mode 100644 index 0000000..d6b87b5 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/ExampleList.cs @@ -0,0 +1,381 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Runtime.Serialization; +using System.Text; + +namespace LemmaSharp +{ + [Serializable] + public class ExampleList : ISerializable + { + #region Private Variables + private LemmatizerSettings lsett; + private RuleList rlRules; + private Dictionary dictExamples; + private List lstExamples; + #endregion + + #region Constructor(s) + public ExampleList(LemmatizerSettings lsett) : base() + { + this.lsett = lsett; + this.dictExamples = new Dictionary(); + this.lstExamples = null; + this.rlRules = new RuleList(lsett); + } + public ExampleList(StreamReader srIn, string sFormat, LemmatizerSettings lsett) : this(lsett) + { + AddMultextFile(srIn, sFormat); + } + #endregion + + #region Public Properties & Indexers + public LemmaExample this[int i] + { + get + { + if (lstExamples == null) + FinalizeAdditions(); + return lstExamples[i]; + } + } + public int Count + { + get + { + if (lstExamples == null) + FinalizeAdditions(); + return lstExamples.Count; + } + } + public double WeightSum + { + get + { + if (lstExamples == null) + FinalizeAdditions(); + + double dWeight = 0; + foreach (LemmaExample exm in lstExamples) + dWeight += exm.Weight; + return dWeight; + } + } + public RuleList Rules + { + get + { + return rlRules; + } + } + public List ListExamples + { + get + { + if (lstExamples == null) + FinalizeAdditions(); + return lstExamples; + } + } + #endregion + + #region Essential Class Functions (adding/removing examples) + public void AddMultextFile(StreamReader srIn, string sFormat) + { + //read from file + string sLine = null; + int iError = 0; + int iLine = 0; + var iW = sFormat.IndexOf('W'); + var iL = sFormat.IndexOf('L'); + var iM = sFormat.IndexOf('M'); + var iF = sFormat.IndexOf('F'); + var iLen = Math.Max(Math.Max(iW, iL), Math.Max(iM, iF)) + 1; + + if (iW < 0 || iL < 0) + { + throw new Exception("Can not find word and lemma location in the format specification"); + } + while ((sLine = srIn.ReadLine()) != null && iError < 50) + { + iLine++; + string[] asWords = sLine.Split(new char[] { '\t' }); + if (asWords.Length < iLen) + { + //Console.WriteLine("ERROR: Line doesn't confirm to the given format \"" + sFormat + "\"! Line " + iLine.ToString() + "."); + iError++; + continue; + } + var sWord = asWords[iW]; + var sLemma = asWords[iL]; + if (sLemma.Equals("=", StringComparison.Ordinal)) + sLemma = sWord; + string sMsd = null; + if (iM > -1) + sMsd = asWords[iM]; + double dWeight = 1; ; + if (iF > -1) + Double.TryParse(asWords[iM], out dWeight); + AddExample(sWord, sLemma, dWeight, sMsd); + } + if (iError == 50) + throw new Exception("Parsing stopped because of too many (50) errors. Check format specification"); + } + + public LemmaExample AddExample(string sWord, string sLemma, double dWeight, string sMsd) + { + string sNewMsd = lsett.eMsdConsider != LemmatizerSettings.MsdConsideration.Ignore + ? sMsd + : null; + var leNew = new LemmaExample(sWord, sLemma, dWeight, sNewMsd, rlRules, lsett); + return Add(leNew); + } + + private LemmaExample Add(LemmaExample leNew) + { + LemmaExample leReturn = null; + if (!dictExamples.TryGetValue(leNew.Signature, out leReturn)) + { + leReturn = leNew; + dictExamples.Add(leReturn.Signature, leReturn); + } + else + leReturn.Join(leNew); + lstExamples = null; + return leReturn; + } + public void DropExamples() + { + dictExamples.Clear(); + lstExamples = null; + } + public void FinalizeAdditions() + { + if (lstExamples != null) + return; + lstExamples = new List(dictExamples.Values); + lstExamples.Sort(); + } + public ExampleList GetFrontRearExampleList(bool front) + { + var elExamplesNew = new ExampleList(lsett); + foreach (var le in this.ListExamples) + { + if (front) + elExamplesNew.AddExample(le.WordFront, le.LemmaFront, le.Weight, le.Msd); + else + elExamplesNew.AddExample(le.WordRear, le.LemmaRear, le.Weight, le.Msd); + } + elExamplesNew.FinalizeAdditions(); + return elExamplesNew; + } + #endregion + + #region Output Functions (ToString) + public override string ToString() + { + var sb = new StringBuilder(); + foreach (var exm in lstExamples) + { + sb.AppendLine(exm.ToString()); + } + return sb.ToString(); + } + #endregion + + #region Serialization Functions (.Net Default - ISerializable) + public void GetObjectData(SerializationInfo info, StreamingContext context) + { + info.AddValue("lsett", lsett); + info.AddValue("iNumExamples", dictExamples.Count); + var aWords = new string[dictExamples.Count]; + var aLemmas = new string[dictExamples.Count]; + var aWeights = new double[dictExamples.Count]; + var aMsds = new string[dictExamples.Count]; + int iExm = 0; + foreach (var exm in dictExamples.Values) + { + aWords[iExm] = exm.Word; + aLemmas[iExm] = exm.Lemma; + aWeights[iExm] = exm.Weight; + aMsds[iExm] = exm.Msd; + iExm++; + } + info.AddValue("aWords", aWords); + info.AddValue("aLemmas", aLemmas); + info.AddValue("aWeights", aWeights); + info.AddValue("aMsds", aMsds); + } + public ExampleList(SerializationInfo info, StreamingContext context) + { + lsett = (LemmatizerSettings)info.GetValue("lsett", typeof(LemmatizerSettings)); + this.dictExamples = new Dictionary(); + this.lstExamples = null; + this.rlRules = new RuleList(lsett); + var aWords = (string[])info.GetValue("aWords", typeof(string[])); + var aLemmas = (string[])info.GetValue("aLemmas", typeof(string[])); + var aWeights = (double[])info.GetValue("aWeights", typeof(double[])); + var aMsds = (string[])info.GetValue("aMsds", typeof(string[])); + for (int iExm = 0; iExm < aWords.Length; iExm++) + AddExample(aWords[iExm], aLemmas[iExm], aWeights[iExm], aMsds[iExm]); + } + #endregion + + #region Serialization Functions (Binary) + public void Serialize(BinaryWriter binWrt, bool bSerializeExamples, bool bThisTopObject) + { + //save metadata + binWrt.Write(bThisTopObject); + + //save refernce types if needed ------------------------- + if (bThisTopObject) + lsett.Serialize(binWrt); + + rlRules.Serialize(binWrt, false); + + if (!bSerializeExamples) + { + binWrt.Write(false); // lstExamples == null + binWrt.Write(0); // dictExamples.Count == 0 + } + else + { + if (lstExamples == null) + { + binWrt.Write(false); // lstExamples == null + //save dictionary items + int iCount = dictExamples.Count; + binWrt.Write(iCount); + foreach (var kvp in dictExamples) + { + binWrt.Write(kvp.Value.Rule.Signature); + kvp.Value.Serialize(binWrt, false); + } + } + else + { + binWrt.Write(true); // lstExamples != null + //save list & dictionary items + var iCount = lstExamples.Count; + binWrt.Write(iCount); + foreach (var le in lstExamples) + { + binWrt.Write(le.Rule.Signature); + le.Serialize(binWrt, false); + } + } + } + } + public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett) + { + //load metadata + var bThisTopObject = binRead.ReadBoolean(); + //load refernce types if needed ------------------------- + if (bThisTopObject) + this.lsett = new LemmatizerSettings(binRead); + else + this.lsett = lsett; + + rlRules = new RuleList(binRead, this.lsett); + var bCreateLstExamples = binRead.ReadBoolean(); + lstExamples = bCreateLstExamples ? new List() : null; + dictExamples = new Dictionary(); + + //load dictionary items + var iCount = binRead.ReadInt32(); + for (var iId = 0; iId < iCount; iId++) + { + var lrRule = rlRules[binRead.ReadString()]; + var le = new LemmaExample(binRead, this.lsett, lrRule); + dictExamples.Add(le.Signature, le); + if (bCreateLstExamples) + lstExamples.Add(le); + } + } + public ExampleList(BinaryReader binRead, LemmatizerSettings lsett) + { + Deserialize(binRead, lsett); + } + #endregion + + #region Serialization Functions (Latino) +#if LATINO + public void Save(Latino.BinarySerializer binWrt, bool bSerializeExamples, bool bThisTopObject) { + //save metadata + binWrt.WriteBool(bThisTopObject); + + //save refernce types if needed ------------------------- + if (bThisTopObject) + lsett.Save(binWrt); + + rlRules.Save(binWrt, false); + + if (!bSerializeExamples) { + binWrt.WriteBool(false); // lstExamples == null + binWrt.WriteInt(0); // dictExamples.Count == 0 + } + else { + if (lstExamples == null) { + binWrt.WriteBool(false); // lstExamples == null + + //save dictionary items + int iCount = dictExamples.Count; + binWrt.WriteInt(iCount); + + foreach (KeyValuePair kvp in dictExamples) { + binWrt.WriteString(kvp.Value.Rule.Signature); + kvp.Value.Save(binWrt, false); + } + } + else { + binWrt.WriteBool(true); // lstExamples != null + + //save list & dictionary items + int iCount = lstExamples.Count; + binWrt.WriteInt(iCount); + + foreach (LemmaExample le in lstExamples) { + binWrt.WriteString(le.Rule.Signature); + le.Save(binWrt, false); + } + } + } + + } + public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett) { + //load metadata + bool bThisTopObject = binRead.ReadBool(); + + //load refernce types if needed ------------------------- + if (bThisTopObject) + this.lsett = new LemmatizerSettings(binRead); + else + this.lsett = lsett; + + rlRules = new RuleList(binRead, this.lsett); + + bool bCreateLstExamples = binRead.ReadBool(); + + lstExamples = bCreateLstExamples ? new List() : null; + dictExamples = new Dictionary(); + + //load dictionary items + int iCount = binRead.ReadInt(); + for (int iId = 0; iId < iCount; iId++) { + LemmaRule lrRule = rlRules[binRead.ReadString()]; + LemmaExample le = new LemmaExample(binRead, this.lsett, lrRule); + + dictExamples.Add(le.Signature, le); + if (bCreateLstExamples) lstExamples.Add(le); + } + + } + public ExampleList(Latino.BinarySerializer binRead, LemmatizerSettings lsett) { + Load(binRead, lsett); + } + +#endif + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/LemmaExample.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/LemmaExample.cs new file mode 100644 index 0000000..7ab57e4 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/LemmaExample.cs @@ -0,0 +1,481 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace LemmaSharp +{ + public class LemmaExample : IComparable, IComparer + { + #region Private Variables + private string sWord; + private string sLemma; + private string sSignature; + private string sMsd; + private double dWeight; + private LemmaRule lrRule; + private LemmatizerSettings lsett; + + private string sWordRearCache; + private string sWordFrontCache; + private string sLemmaFrontCache; + #endregion + + #region Constructor(s) + public LemmaExample(string sWord, string sLemma, double dWeight, string sMsd, RuleList rlRules, LemmatizerSettings lsett) + { + this.lsett = lsett; + this.sWord = sWord; + this.sLemma = sLemma; + this.sMsd = sMsd; + this.dWeight = dWeight; + this.lrRule = rlRules.AddRule(this); + switch (lsett.eMsdConsider) + { + case LemmatizerSettings.MsdConsideration.Ignore: + case LemmatizerSettings.MsdConsideration.JoinAll: + case LemmatizerSettings.MsdConsideration.JoinDistinct: + case LemmatizerSettings.MsdConsideration.JoinSameSubstring: + sSignature = string.Format("[{0}]==>[{1}]", sWord, sLemma); + break; + case LemmatizerSettings.MsdConsideration.Distinct: + default: + sSignature = string.Format("[{0}]==>[{1}]({2})", sWord, sLemma, sMsd ?? ""); + break; + } + this.sWordRearCache = null; + this.sWordFrontCache = null; + this.sLemmaFrontCache = null; + } + #endregion + + #region Public Properties + public string Word + { + get + { + return sWord; + } + } + public string Lemma + { + get + { + return sLemma; + } + } + public string Msd + { + get + { + return sMsd; + } + } + public string Signature + { + get + { + return sSignature; + } + } + public double Weight + { + get + { + return dWeight; + } + } + public LemmaRule Rule + { + get + { + return lrRule; + } + } + /// + /// Word to be pre-lemmatized with Front-Lemmatizer into LemmaFront which is then lemmatized by standard Rear-Lemmatizer (Warning it is reversed) + /// + public string WordFront + { + get + { + if (sWordFrontCache == null) + sWordFrontCache = StringReverse(sWord); + return sWordFrontCache; + } + } + /// + /// Lemma to be produced by pre-lemmatizing with Front-Lemmatizer (Warning it is reversed) + /// + public string LemmaFront + { + get + { + if (sLemmaFrontCache == null) + sLemmaFrontCache = StringReverse(WordRear); + return sLemmaFrontCache; + } + } + /// + /// word to be lemmatized by standard Rear-Lemmatizer (it's beggining has been already modified by Front-Lemmatizer) + /// + public string WordRear + { + get + { + if (sWordRearCache == null) + { + int lemmaPos = 0, wordPos = 0; + var common = LongestCommonSubstring(sWord, sLemma, ref wordPos, ref lemmaPos); + sWordRearCache = lemmaPos == -1 ? sLemma : (sLemma.Substring(0, lemmaPos + common.Length) + sWord.Substring(wordPos + common.Length)); + } + return sWordRearCache; + } + } + /// + /// lemma to be produced by standard Rear-Lemmatizer from WordRear + /// + public string LemmaRear + { + get + { + return sLemma; + } + } + #endregion + + #region Essential Class Functions (joining two examples into one) + //TODO - this function is not totaly ok because sMsd should not be + //changed since it could be included in signature + public void Join(LemmaExample leJoin) + { + dWeight += leJoin.dWeight; + if (sMsd != null) + switch (lsett.eMsdConsider) + { + case LemmatizerSettings.MsdConsideration.Ignore: + sMsd = null; + break; + case LemmatizerSettings.MsdConsideration.Distinct: + break; + case LemmatizerSettings.MsdConsideration.JoinAll: + sMsd += "|" + leJoin.sMsd; + break; + case LemmatizerSettings.MsdConsideration.JoinDistinct: + var append = string.Format("|{0}", leJoin.sMsd); + if (false == sMsd.Equals(leJoin.sMsd, StringComparison.Ordinal) && + sMsd.IndexOf(append) < 0) + { + sMsd += append; + } + break; + case LemmatizerSettings.MsdConsideration.JoinSameSubstring: + int iPos = 0; + var iMax = Math.Min(sMsd.Length, leJoin.sMsd.Length); + while (iPos < iMax && sMsd[iPos] == leJoin.sMsd[iPos]) + iPos++; + sMsd = sMsd.Substring(0, iPos); + break; + default: + break; + } + + } + #endregion + + #region Essential Class Functions (calculating similarities betwen examples) + public int Similarity(LemmaExample le) + { + return Similarity(this, le); + } + public static int Similarity(LemmaExample le1, LemmaExample le2) + { + var sWord1 = le1.sWord; + var sWord2 = le2.sWord; + var iLen1 = sWord1.Length; + var iLen2 = sWord2.Length; + var iMaxLen = Math.Min(iLen1, iLen2); + for (var iPos = 1; iPos <= iMaxLen; iPos++) + { + if (sWord1[iLen1 - iPos] != sWord2[iLen2 - iPos]) + return iPos - 1; + } + //TODO similarity should be bigger if two words are totaly equal + //if (sWord1 == sWord2) + // return iMaxLen + 1; + //else + return iMaxLen; + } + #endregion + + #region Essential Class Functions (comparing examples - eg.: for sorting) + /// + /// Function used to comprare current MultextExample (ME) against argument ME. + /// Mainly used in for sorting lists of MEs. + /// + /// MultextExample (ME) that we compare current ME against. + /// 1 if current ME is bigger, -1 if smaler and 0 if both are the same. + public int CompareTo(LemmaExample other) + { + var iComparison = CompareStrings(this.sWord, other.sWord, false); + if (iComparison != 0) + return iComparison; + + iComparison = CompareStrings(this.sLemma, other.sLemma, true); + if (iComparison != 0) + return iComparison; + + if (lsett.eMsdConsider == LemmatizerSettings.MsdConsideration.Distinct && + this.sMsd != null && other.sMsd != null) + { + iComparison = CompareStrings(this.sMsd, other.sMsd, true); + if (iComparison != 0) + return iComparison; + } + return 0; + } + + public int Compare(LemmaExample x, LemmaExample y) + { + return x.CompareTo(y); + } + + public static int CompareStrings(string sStr1, string sStr2, bool bForward) + { + var iLen1 = sStr1.Length; + var iLen2 = sStr2.Length; + var iMaxLen = Math.Min(iLen1, iLen2); + if (bForward) + { + for (int iPos = 0; iPos < iMaxLen; iPos++) + { + if (sStr1[iPos] > sStr2[iPos]) + return 1; + if (sStr1[iPos] < sStr2[iPos]) + return -1; + } + } + else + { + for (int iPos = 1; iPos <= iMaxLen; iPos++) + { + if (sStr1[iLen1 - iPos] > sStr2[iLen2 - iPos]) + return 1; + if (sStr1[iLen1 - iPos] < sStr2[iLen2 - iPos]) + return -1; + } + } + if (iLen1 > iLen2) + return 1; + if (iLen1 < iLen2) + return -1; + return 0; + } + + public static int EqualPrifixLen(string sStr1, string sStr2) + { + var iLen1 = sStr1.Length; + var iLen2 = sStr2.Length; + var iMaxLen = Math.Min(iLen1, iLen2); + + for (var iPos = 0; iPos < iMaxLen; iPos++) + { + if (sStr1[iPos] != sStr2[iPos]) + return iPos; + } + return iMaxLen; + } + + public static string LongestCommonSubstring(string sStr1, string sStr2, ref int iPosInStr1, ref int iPosInStr2) + { + var l = new int[sStr1.Length + 1, sStr2.Length + 1]; + int z = 0; + string ret = ""; + iPosInStr1 = -1; + iPosInStr2 = -1; + for (var i = 0; i < sStr1.Length; i++) + { + for (var j = 0; j < sStr2.Length; j++) + { + if (sStr1[i] == sStr2[j]) + { + if (i == 0 || j == 0) + { + l[i, j] = 1; + } + else + { + l[i, j] = l[i - 1, j - 1] + 1; + } + if (l[i, j] > z) + { + z = l[i, j]; + iPosInStr1 = i - z + 1; + iPosInStr2 = j - z + 1; + ret = sStr1.Substring(i - z + 1, z); + } + } + } + } + return ret; + } + + public static string StringReverse(string s) + { + if (s == null) + return null; + var charArray = new char[s.Length]; + var len = s.Length >> 1; + for (var i = 0; i < len; i++, len--) + { + charArray[i] = s[len]; + charArray[len] = s[i]; + } + return new string(charArray); + } + #endregion + + #region Output Functions (ToString) + public override string ToString() + { + var sb = new StringBuilder(); + if (sWord != null) + sb.AppendFormat("W:\"{0}\" ", sWord); + if (sLemma != null) + sb.AppendFormat("L:\"{0}\" ", sLemma); + if (sMsd != null) + sb.AppendFormat("M:\"{0}\" ", sMsd); + if (false == Double.IsNaN(dWeight)) + sb.AppendFormat("F:\"{0}\" ", dWeight); + if (lrRule != null) + sb.AppendFormat("R:{0} ", lrRule); + if (sb.Length > 0) + return sb.ToString(0, sb.Length - 1); + return string.Empty; + } + #endregion + + #region Serialization Functions (Binary) + public void Serialize(BinaryWriter binWrt, bool bThisTopObject) + { + //save metadata + binWrt.Write(bThisTopObject); + + //save value types -------------------------------------- + binWrt.Write(sWord); + binWrt.Write(sLemma); + binWrt.Write(sSignature); + if (sMsd == null) + { + binWrt.Write(false); + } + else + { + binWrt.Write(true); + binWrt.Write(sMsd); + } + binWrt.Write(dWeight); + //save refernce types if needed ------------------------- + if (bThisTopObject) + { + lsett.Serialize(binWrt); + lrRule.Serialize(binWrt, false); + } + } + public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett, LemmaRule lrRule) + { + //load metadata + var bThisTopObject = binRead.ReadBoolean(); + + //load value types -------------------------------------- + sWord = binRead.ReadString(); + sLemma = binRead.ReadString(); + sSignature = binRead.ReadString(); + if (binRead.ReadBoolean()) + sMsd = binRead.ReadString(); + else + sMsd = null; + dWeight = binRead.ReadDouble(); + + //load refernce types if needed ------------------------- + if (bThisTopObject) + { + this.lsett = new LemmatizerSettings(binRead); + this.lrRule = new LemmaRule(binRead, this.lsett); + } + else + { + this.lsett = lsett; + this.lrRule = lrRule; + } + this.sWordRearCache = null; + this.sWordFrontCache = null; + this.sLemmaFrontCache = null; + } + + public LemmaExample(BinaryReader binRead, LemmatizerSettings lsett, LemmaRule lrRule) + { + Deserialize(binRead, lsett, lrRule); + } + #endregion + + #region Serialization Functions (Latino) +#if LATINO + + public void Save(Latino.BinarySerializer binWrt, bool bThisTopObject) { + //save metadata + binWrt.WriteBool(bThisTopObject); + + //save value types -------------------------------------- + binWrt.WriteString(sWord); + binWrt.WriteString(sLemma); + binWrt.WriteString(sSignature); + if (sMsd == null) + binWrt.WriteBool(false); + else { + binWrt.WriteBool(true); + binWrt.WriteString(sMsd); + } + binWrt.WriteDouble(dWeight); + + //save refernce types if needed ------------------------- + if (bThisTopObject) { + lsett.Save(binWrt); + lrRule.Save(binWrt, false); + } + } + public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett, LemmaRule lrRule) { + //load metadata + bool bThisTopObject = binRead.ReadBool(); + + //load value types -------------------------------------- + sWord = binRead.ReadString(); + sLemma = binRead.ReadString(); + sSignature = binRead.ReadString(); + if (binRead.ReadBool()) + sMsd = binRead.ReadString(); + else + sMsd = null; + dWeight = binRead.ReadDouble(); + + //load refernce types if needed ------------------------- + if (bThisTopObject) { + this.lsett = new LemmatizerSettings(binRead); + this.lrRule = new LemmaRule(binRead, this.lsett); + } + else { + this.lsett = lsett; + this.lrRule = lrRule; + } + + } + public LemmaExample(Latino.BinarySerializer binRead, LemmatizerSettings lsett, LemmaRule lrRule) { + Load(binRead, lsett, lrRule); + } + +#endif + #endregion + } +} + + diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/LemmaRule.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/LemmaRule.cs new file mode 100644 index 0000000..722ea85 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/LemmaRule.cs @@ -0,0 +1,189 @@ +using System; +using System.IO; + +namespace LemmaSharp +{ + public class LemmaRule + { + #region Private Variables + private int iId; + private int iFrom; + private string sFrom; + private string sTo; + private string sSignature; + private LemmatizerSettings lsett; + #endregion + + #region Constructor(s) + public LemmaRule(string sWord, string sLemma, int iId, LemmatizerSettings lsett) + { + this.lsett = lsett; + this.iId = iId; + + int iSameStem = SameStem(sWord, sLemma); + sTo = sLemma.Substring(iSameStem); + iFrom = sWord.Length - iSameStem; + + if (lsett.bUseFromInRules) + { + sFrom = sWord.Substring(iSameStem); + sSignature = string.Format("[{0}]==>[{1}]", sFrom, sTo); + } + else + { + sFrom = null; + sSignature = string.Format("[#{0}]==>[{1}]", iFrom, sTo); + } + } + #endregion + + #region Public Properties + public string Signature + { + get + { + return sSignature; + } + } + public int Id + { + get + { + return iId; + } + } + #endregion + + #region Essential Class Functions + private static int SameStem(string sStr1, string sStr2) + { + var iLen1 = sStr1.Length; + var iLen2 = sStr2.Length; + var iMaxLen = Math.Min(iLen1, iLen2); + for (var iPos = 0; iPos < iMaxLen; iPos++) + { + if (sStr1[iPos] != sStr2[iPos]) + return iPos; + } + return iMaxLen; + } + public bool IsApplicableToGroup(int iGroupCondLen) + { + return iGroupCondLen >= iFrom; + } + public string Lemmatize(string sWord) + { + return sWord.Substring(0, sWord.Length - iFrom) + sTo; + } + #endregion + + #region Output Functions (ToString) + public override string ToString() + { + return string.Format("{0}:{1}", iId, sSignature); + } + #endregion + + #region Serialization Functions (Binary) + public void Serialize(BinaryWriter binWrt, bool bThisTopObject) + { + //save metadata + binWrt.Write(bThisTopObject); + + //save value types -------------------------------------- + binWrt.Write(iId); + binWrt.Write(iFrom); + if (sFrom == null) + binWrt.Write(false); + else + { + binWrt.Write(true); + binWrt.Write(sFrom); + } + binWrt.Write(sTo); + binWrt.Write(sSignature); + + if (bThisTopObject) + lsett.Serialize(binWrt); + } + public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett) + { + //load metadata + var bThisTopObject = binRead.ReadBoolean(); + + //load value types -------------------------------------- + iId = binRead.ReadInt32(); + iFrom = binRead.ReadInt32(); + if (binRead.ReadBoolean()) + { + sFrom = binRead.ReadString(); + } + else + { + sFrom = null; + } + sTo = binRead.ReadString(); + sSignature = binRead.ReadString(); + //load refernce types if needed ------------------------- + if (bThisTopObject) + this.lsett = new LemmatizerSettings(binRead); + else + this.lsett = lsett; + } + + public LemmaRule(System.IO.BinaryReader binRead, LemmatizerSettings lsett) + { + this.Deserialize(binRead, lsett); + } + #endregion + + #region Serialization Functions (Latino) +#if LATINO + + public void Save(Latino.BinarySerializer binWrt, bool bThisTopObject) { + //save metadata + binWrt.WriteBool(bThisTopObject); + + //save value types -------------------------------------- + binWrt.WriteInt(iId); + binWrt.WriteInt(iFrom); + if (sFrom == null) + binWrt.WriteBool(false); + else { + binWrt.WriteBool(true); + binWrt.WriteString(sFrom); + } + binWrt.WriteString(sTo); + binWrt.WriteString(sSignature); + + if (bThisTopObject) + lsett.Save(binWrt); + } + public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett) { + //load metadata + bool bThisTopObject = binRead.ReadBool(); + + //load value types -------------------------------------- + iId = binRead.ReadInt(); + iFrom = binRead.ReadInt(); + if (binRead.ReadBool()) + sFrom = binRead.ReadString(); + else + sFrom = null; + sTo = binRead.ReadString(); + sSignature = binRead.ReadString(); + + //load refernce types if needed ------------------------- + if (bThisTopObject) + this.lsett = new LemmatizerSettings(binRead); + else + this.lsett = lsett; + } + public LemmaRule(Latino.BinarySerializer binRead, LemmatizerSettings lsett) { + Load(binRead, lsett); + } + +#endif + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/LemmaTreeNode.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/LemmaTreeNode.cs new file mode 100644 index 0000000..7991548 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/LemmaTreeNode.cs @@ -0,0 +1,478 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace LemmaSharp +{ + [Serializable] + public class LemmaTreeNode : ILemmatizerModel + { + #region Private Variables + //settings + private LemmatizerSettings lsett; + //tree structure references + private Dictionary dictSubNodes; + private LemmaTreeNode ltnParentNode; + //essential node properties + private int iSimilarity; //similarity among all words in this node + private string sCondition; //suffix that must match in order to lemmatize + private bool bWholeWord; //true if condition has to match to whole word + //rules and weights; + private LemmaRule lrBestRule; //the best rule to be applied when lemmatizing + private RuleWeighted[] aBestRules; //list of best rules + private double dWeight; + //source of this node + private int iStart; + private int iEnd; + private ExampleList elExamples; + #endregion + + #region Constructor(s) & Destructor(s) + private LemmaTreeNode(LemmatizerSettings lsett) + { + this.lsett = lsett; + } + public LemmaTreeNode(LemmatizerSettings lsett, ExampleList elExamples) + : this(lsett, elExamples, 0, elExamples.Count - 1, null) + { + } + /// + /// + /// + /// + /// + /// Index of the first word of the current group + /// Index of the last word of the current group + /// + private LemmaTreeNode(LemmatizerSettings lsett, ExampleList elExamples, int iStart, int iEnd, LemmaTreeNode ltnParentNode) : this(lsett) + { + this.ltnParentNode = ltnParentNode; + this.dictSubNodes = null; + this.iStart = iStart; + this.iEnd = iEnd; + this.elExamples = elExamples; + if (iStart >= elExamples.Count || iEnd >= elExamples.Count || iStart > iEnd) + { + lrBestRule = elExamples.Rules.DefaultRule; + aBestRules = new RuleWeighted[1]; + aBestRules[0] = new RuleWeighted(lrBestRule, 0); + dWeight = 0; + return; + } + int iConditionLength = Math.Min(ltnParentNode == null ? 0 : ltnParentNode.iSimilarity + 1, elExamples[iStart].Word.Length); + this.sCondition = elExamples[iStart].Word.Substring(elExamples[iStart].Word.Length - iConditionLength); + this.iSimilarity = elExamples[iStart].Similarity(elExamples[iEnd]); + this.bWholeWord = ltnParentNode == null ? false : elExamples[iEnd].Word.Length == ltnParentNode.iSimilarity; + FindBestRules(); + AddSubAll(); + //TODO check this heuristics, can be problematic when there are more applicable rules + if (dictSubNodes != null) + { + var lReplaceNodes = new List>(); + foreach (var kvpChild in dictSubNodes) + if (kvpChild.Value.dictSubNodes != null && kvpChild.Value.dictSubNodes.Count == 1) + { + var enumChildChild = kvpChild.Value.dictSubNodes.Values.GetEnumerator(); + enumChildChild.MoveNext(); + var ltrChildChild = enumChildChild.Current; + if (kvpChild.Value.lrBestRule == lrBestRule) + lReplaceNodes.Add(new KeyValuePair(kvpChild.Key, ltrChildChild)); + } + foreach (var kvpChild in lReplaceNodes) + { + dictSubNodes[kvpChild.Key] = kvpChild.Value; + kvpChild.Value.ltnParentNode = this; + } + } + } + #endregion + + #region Public Properties + public int TreeSize + { + get + { + int iCount = 1; + if (dictSubNodes != null) + { + foreach (var ltnChild in dictSubNodes.Values) + { + iCount += ltnChild.TreeSize; + } + } + return iCount; + } + } + public double Weight + { + get + { + return dWeight; + } + } + + #endregion + + #region Essential Class Functions (building model) + private void FindBestRules() + { + /* + * LINQ SPEED TEST (Slower than current metodology) + * + List leApplicable = new List(); + for (int iExm = iStart; iExm <= iEnd; iExm++) + if (elExamples[iExm].Rule.IsApplicableToGroup(sCondition.Length)) + leApplicable.Add(elExamples[iExm]); + + List> lBestRules = new List>(); + lBestRules.AddRange( + leApplicable. + GroupBy>( + le => le.Rule, + le => le.Weight, + (lr, enumDbl) => new KeyValuePair(lr, enumDbl.Aggregate((acc, curr) => acc + curr)) + ). + OrderBy(kvpLrWght=>kvpLrWght.Value) + ); + + if (lBestRules.Count > 0) + lrBestRule = lBestRules[0].Key; + else { + lrBestRule = elExamples.Rules.DefaultRule; + + } + */ + dWeight = 0; + //calculate dWeight of whole node and calculates qualities for all rules + var dictApplicableRules = new Dictionary(); + //dictApplicableRules.Add(elExamples.Rules.DefaultRule, 0); + while (dictApplicableRules.Count == 0) + { + for (var iExm = iStart; iExm <= iEnd; iExm++) + { + var lr = elExamples[iExm].Rule; + var dExmWeight = elExamples[iExm].Weight; + dWeight += dExmWeight; + if (lr.IsApplicableToGroup(sCondition.Length)) + { + if (dictApplicableRules.ContainsKey(lr)) + dictApplicableRules[lr] += dExmWeight; + else + dictApplicableRules.Add(lr, dExmWeight); + } + } + //if none found then increase condition length or add some default appliable rule + if (dictApplicableRules.Count == 0) + { + if (this.sCondition.Length < iSimilarity) + this.sCondition = elExamples[iStart].Word.Substring(elExamples[iStart].Word.Length - (sCondition.Length + 1)); + else + //TODO preveri hevristiko, mogoce je bolje ce se doda default rule namesto rulea od starsa + dictApplicableRules.Add(ltnParentNode.lrBestRule, 0); + } + } + //TODO can optimize this step using sorted list (dont add if it's worse than the worst) + var lSortedRules = new List(); + foreach (var kvp in dictApplicableRules) + { + lSortedRules.Add(new RuleWeighted(kvp.Key, kvp.Value / dWeight)); + } + lSortedRules.Sort(); + + //keep just best iMaxRulesPerNode rules + var iNumRules = lSortedRules.Count; + if (lsett.iMaxRulesPerNode > 0) + iNumRules = Math.Min(lSortedRules.Count, lsett.iMaxRulesPerNode); + + aBestRules = new RuleWeighted[iNumRules]; + for (var iRule = 0; iRule < iNumRules; iRule++) + { + aBestRules[iRule] = lSortedRules[iRule]; + } + + //set best rule + lrBestRule = aBestRules[0].Rule; + + //TODO must check if this hevristics is OK (to privilige parent rule) + if (ltnParentNode != null) + { + for (int iRule = 0; iRule < lSortedRules.Count && + lSortedRules[iRule].Weight == lSortedRules[0].Weight; iRule++) + { + if (lSortedRules[iRule].Rule == ltnParentNode.lrBestRule) + { + lrBestRule = lSortedRules[iRule].Rule; + break; + } + } + } + } + + private void AddSubAll() + { + int iStartGroup = iStart; + var chCharPrev = '\0'; + var bSubGroupNeeded = false; + for (var iWrd = iStart; iWrd <= iEnd; iWrd++) + { + var sWord = elExamples[iWrd].Word; + var chCharThis = sWord.Length > iSimilarity ? sWord[sWord.Length - 1 - iSimilarity] : '\0'; + if (iWrd != iStart && chCharPrev != chCharThis) + { + if (bSubGroupNeeded) + { + AddSub(iStartGroup, iWrd - 1, chCharPrev); + bSubGroupNeeded = false; + } + iStartGroup = iWrd; + } + + //TODO check out bSubGroupNeeded when there are multiple posible rules (not just lrBestRule) + if (elExamples[iWrd].Rule != lrBestRule) + { + bSubGroupNeeded = true; + } + chCharPrev = chCharThis; + } + if (bSubGroupNeeded && iStartGroup != iStart) + { + AddSub(iStartGroup, iEnd, chCharPrev); + } + } + + private void AddSub(int iStart, int iEnd, char chChar) + { + var ltnSub = new LemmaTreeNode(lsett, elExamples, iStart, iEnd, this); + + //TODO - maybe not realy appropriate because loosing statisitcs from multiple possible rules + if (ltnSub.lrBestRule == lrBestRule && ltnSub.dictSubNodes == null) + return; + + if (dictSubNodes == null) + dictSubNodes = new Dictionary(); + dictSubNodes.Add(chChar, ltnSub); + } + #endregion + + #region Essential Class Functions (running model = lemmatizing) + public bool ConditionSatisfied(string sWord) + { + //if (bWholeWord) + // return sWord == sCondition; + //else + // return sWord.EndsWith(sCondition); + + var iDiff = sWord.Length - sCondition.Length; + if (iDiff < 0 || (bWholeWord && iDiff > 0)) + return false; + + var iWrdEnd = sCondition.Length - ltnParentNode.sCondition.Length - 1; + for (var iChar = 0; iChar < iWrdEnd; iChar++) + { + if (sCondition[iChar] != sWord[iChar + iDiff]) + return false; + } + return true; + } + public string Lemmatize(string sWord) + { + if (sWord.Length >= iSimilarity && dictSubNodes != null) + { + char chChar = sWord.Length > iSimilarity ? sWord[sWord.Length - 1 - iSimilarity] : '\0'; + if (dictSubNodes.ContainsKey(chChar) && dictSubNodes[chChar].ConditionSatisfied(sWord)) + return dictSubNodes[chChar].Lemmatize(sWord); + } + return lrBestRule.Lemmatize(sWord); + } + + #endregion + + #region Output Functions (ToString) + public override string ToString() + { + var sb = new StringBuilder(); + ToString(sb, 0); + return sb.ToString(); + } + + private void ToString(StringBuilder sb, int iLevel) + { + sb.Append(new string('\t', iLevel)); + sb.AppendFormat("Suffix=\"{0}{1}\"; ", bWholeWord ? "^" : string.Empty, sCondition); + sb.AppendFormat("Rule=\"{0}\"; ", lrBestRule); + sb.AppendFormat("Weight=\"{0}\"; ", dWeight); + if (aBestRules != null && aBestRules.Length > 0) + sb.AppendFormat("Cover={0}; ", aBestRules[0].Weight); + sb.Append("Rulles="); + if (aBestRules != null) + { + foreach (var rw in aBestRules) + sb.AppendFormat(" {0}", rw); + } + sb.Append("; "); + sb.AppendLine(); + if (dictSubNodes != null) + { + foreach (var ltnChild in dictSubNodes.Values) + { + ltnChild.ToString(sb, iLevel + 1); + } + } + } + #endregion + + #region Serialization Functions (Binary) + public void Serialize(BinaryWriter binWrt) + { + binWrt.Write(dictSubNodes != null); + if (dictSubNodes != null) + { + binWrt.Write(dictSubNodes.Count); + foreach (var kvp in dictSubNodes) + { + binWrt.Write(kvp.Key); + kvp.Value.Serialize(binWrt); + } + } + binWrt.Write(iSimilarity); + binWrt.Write(sCondition); + binWrt.Write(bWholeWord); + binWrt.Write(lrBestRule.Signature); + binWrt.Write(aBestRules.Length); + for (var i = 0; i < aBestRules.Length; i++) + { + binWrt.Write(aBestRules[i].Rule.Signature); + binWrt.Write(aBestRules[i].Weight); + } + binWrt.Write(dWeight); + binWrt.Write(iStart); + binWrt.Write(iEnd); + } + + public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode) + { + this.lsett = lsett; + if (binRead.ReadBoolean()) + { + dictSubNodes = new Dictionary(); + var iCount = binRead.ReadInt32(); + for (var i = 0; i < iCount; i++) + { + var cKey = binRead.ReadChar(); + var ltrSub = new LemmaTreeNode(binRead, this.lsett, elExamples, this); + dictSubNodes.Add(cKey, ltrSub); + } + } + else + { + dictSubNodes = null; + } + this.ltnParentNode = ltnParentNode; + + iSimilarity = binRead.ReadInt32(); + sCondition = binRead.ReadString(); + bWholeWord = binRead.ReadBoolean(); + lrBestRule = elExamples.Rules[binRead.ReadString()]; + + var iCountBest = binRead.ReadInt32(); + aBestRules = new RuleWeighted[iCountBest]; + for (var i = 0; i < iCountBest; i++) + { + aBestRules[i] = + new RuleWeighted(elExamples.Rules[binRead.ReadString()], binRead.ReadDouble()); + } + dWeight = binRead.ReadDouble(); + iStart = binRead.ReadInt32(); + iEnd = binRead.ReadInt32(); + this.elExamples = elExamples; + } + public LemmaTreeNode(BinaryReader binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode) + { + Deserialize(binRead, lsett, elExamples, ltnParentNode); + } + #endregion + + #region Serialization Functions (Latino) +#if LATINO + public void Save(Latino.BinarySerializer binWrt) { + binWrt.WriteBool(dictSubNodes != null); + if (dictSubNodes != null) { + binWrt.WriteInt(dictSubNodes.Count); + foreach (KeyValuePair kvp in dictSubNodes) { + binWrt.WriteChar(kvp.Key); + kvp.Value.Save(binWrt); + } + } + + binWrt.WriteInt(iSimilarity); + binWrt.WriteString(sCondition); + binWrt.WriteBool(bWholeWord); + + binWrt.WriteString(lrBestRule.Signature); + binWrt.WriteInt(aBestRules.Length); + for (int i = 0; i < aBestRules.Length; i++) { + binWrt.WriteString(aBestRules[i].Rule.Signature); + binWrt.WriteDouble(aBestRules[i].Weight); + } + binWrt.WriteDouble(dWeight); + + binWrt.WriteInt(iStart); + binWrt.WriteInt(iEnd); + } + public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode) { + this.lsett = lsett; + + if (binRead.ReadBool()) { + dictSubNodes = new Dictionary(); + int iCount = binRead.ReadInt(); + for (int i = 0; i < iCount; i++) { + char cKey = binRead.ReadChar(); + LemmaTreeNode ltrSub = new LemmaTreeNode(binRead, this.lsett, elExamples, this); + dictSubNodes.Add(cKey, ltrSub); + } + } + else + dictSubNodes = null; + + this.ltnParentNode = ltnParentNode; + + iSimilarity = binRead.ReadInt(); + sCondition = binRead.ReadString(); + bWholeWord = binRead.ReadBool(); + + lrBestRule = elExamples.Rules[binRead.ReadString()]; + + int iCountBest = binRead.ReadInt(); + aBestRules = new RuleWeighted[iCountBest]; + for (int i = 0; i < iCountBest; i++) + aBestRules[i] = new RuleWeighted(elExamples.Rules[binRead.ReadString()], binRead.ReadDouble()); + + dWeight = binRead.ReadDouble(); + + iStart = binRead.ReadInt(); + iEnd = binRead.ReadInt(); + this.elExamples = elExamples; + + } + public LemmaTreeNode(Latino.BinarySerializer binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode) { + Load(binRead, lsett, elExamples, ltnParentNode); + } +#endif + #endregion + + #region Other (Temporarly) + //TODO - this is temp function, remove it + public bool CheckConsistency() + { + var bReturn = true; + if (dictSubNodes != null) + foreach (var ltnChild in dictSubNodes.Values) + bReturn = bReturn && + ltnChild.CheckConsistency() && + ltnChild.sCondition.EndsWith(sCondition); + return bReturn; + } + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/Lemmatizer.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/Lemmatizer.cs new file mode 100644 index 0000000..b63632a --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/Lemmatizer.cs @@ -0,0 +1,465 @@ +using System; +using System.Collections.Generic; +using System.Text; +using System.IO; +using System.Runtime.Serialization; +using System.IO.Compression; +using SevenZip; + +namespace LemmaSharp +{ + [Serializable] + public class Lemmatizer : ITrainableLemmatizer +#if LATINO + , Latino.ISerializable +#endif + { + + #region Private Variables + protected LemmatizerSettings lsett; + protected ExampleList elExamples; + protected LemmaTreeNode ltnRootNode; + protected LemmaTreeNode ltnRootNodeFront; + #endregion + + #region Constructor(s) + public Lemmatizer() : + this(new LemmatizerSettings()) + { } + + public Lemmatizer(LemmatizerSettings lsett) + { + this.lsett = lsett; + this.elExamples = new ExampleList(lsett); + this.ltnRootNode = null; + this.ltnRootNodeFront = null; + } + + public Lemmatizer(StreamReader srIn, string sFormat, LemmatizerSettings lsett) : this(lsett) + { + AddMultextFile(srIn, sFormat); + } + #endregion + + #region Private Properties + private LemmaTreeNode ltrRootNodeSafe + { + get + { + if (ltnRootNode == null) + BuildModel(); + return ltnRootNode; + } + } + private LemmaTreeNode ltrRootNodeFrontSafe + { + get + { + if (ltnRootNodeFront == null && lsett.bBuildFrontLemmatizer) + BuildModel(); + return ltnRootNodeFront; + } + } + #endregion + + #region Public Properties + public LemmatizerSettings Settings + { + get + { + return lsett.CloneDeep(); + } + } + public ExampleList Examples + { + get + { + return elExamples; + } + } + public RuleList Rules + { + get + { + return elExamples.Rules; + } + } + public LemmaTreeNode RootNode + { + get + { + return ltrRootNodeSafe; + } + } + public LemmaTreeNode RootNodeFront + { + get + { + return ltrRootNodeFrontSafe; + } + } + public ILemmatizerModel Model + { + get + { + return ltrRootNodeSafe; + } + } + #endregion + + #region Essential Class Functions (adding examples to repository) + public void AddMultextFile(StreamReader srIn, string sFormat) + { + this.elExamples.AddMultextFile(srIn, sFormat); + ltnRootNode = null; + } + public void AddExample(string sWord, string sLemma) + { + AddExample(sWord, sLemma, 1, null); + } + public void AddExample(string sWord, string sLemma, double dWeight) + { + AddExample(sWord, sLemma, dWeight, null); + } + public void AddExample(string sWord, string sLemma, double dWeight, string sMsd) + { + elExamples.AddExample(sWord, sLemma, dWeight, sMsd); + ltnRootNode = null; + } + public void DropExamples() + { + elExamples.DropExamples(); + } + public void FinalizeAdditions() + { + elExamples.FinalizeAdditions(); + } + #endregion + + #region Essential Class Functions (building model & lemmatizing) + public void BuildModel() + { + if (ltnRootNode != null) + return; + + if (!lsett.bBuildFrontLemmatizer) + { + //TODO remove: elExamples.FinalizeAdditions(); + elExamples.FinalizeAdditions(); + ltnRootNode = new LemmaTreeNode(lsett, elExamples); + } + else + { + ltnRootNode = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(false)); + ltnRootNodeFront = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(true)); + } + } + + public string Lemmatize(string sWord) + { + if (!lsett.bBuildFrontLemmatizer) + { + return ltrRootNodeSafe.Lemmatize(sWord); + } + var sWordFront = LemmaExample.StringReverse(sWord); + var sLemmaFront = ltrRootNodeFrontSafe.Lemmatize(sWordFront); + var sWordRear = LemmaExample.StringReverse(sLemmaFront); + return ltrRootNodeSafe.Lemmatize(sWordRear); + } + #endregion + + #region Serialization Functions (ISerializable) + public void GetObjectData(SerializationInfo info, StreamingContext context) + { + info.AddValue("lsett", lsett); + info.AddValue("elExamples", elExamples); + } + + public Lemmatizer(SerializationInfo info, StreamingContext context) : this() + { + lsett = (LemmatizerSettings)info.GetValue("lsett", typeof(LemmatizerSettings)); + elExamples = (ExampleList)info.GetValue("elExamples", typeof(ExampleList)); + this.BuildModel(); + } + #endregion + + #region Serialization Functions (Binary) + public void Serialize(BinaryWriter binWrt, bool bSerializeExamples) + { + lsett.Serialize(binWrt); + binWrt.Write(bSerializeExamples); + elExamples.Serialize(binWrt, bSerializeExamples, false); + + if (!bSerializeExamples) + { + elExamples.GetFrontRearExampleList(false).Serialize(binWrt, bSerializeExamples, false); + elExamples.GetFrontRearExampleList(true).Serialize(binWrt, bSerializeExamples, false); + } + ltnRootNode.Serialize(binWrt); + if (lsett.bBuildFrontLemmatizer) + ltnRootNodeFront.Serialize(binWrt); + } + + public void Deserialize(BinaryReader binRead) + { + lsett = new LemmatizerSettings(binRead); + + var bSerializeExamples = binRead.ReadBoolean(); + elExamples = new ExampleList(binRead, lsett); + + ExampleList elExamplesRear; + ExampleList elExamplesFront; + + if (bSerializeExamples) + { + elExamplesRear = elExamples.GetFrontRearExampleList(false); + elExamplesFront = elExamples.GetFrontRearExampleList(true); + } + else + { + elExamplesRear = new ExampleList(binRead, lsett); + elExamplesFront = new ExampleList(binRead, lsett); + } + + if (!lsett.bBuildFrontLemmatizer) + { + ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples, null); + } + else + { + ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamplesRear, null); + ltnRootNodeFront = new LemmaTreeNode(binRead, lsett, elExamplesFront, null); + } + } + + //Do not change the order!!! (If new compression algorithms are added, otherwise you will not be able to load old files.) + public enum Compression + { + None, + Deflate, + LZMA + } + + public Lemmatizer(BinaryReader binRead) + { + var compr = (Compression)binRead.ReadByte(); + if (compr == Compression.None) + Deserialize(binRead); + else + throw new Exception("Loading lemmatizer with binary reader on uncompressed stream is not supported."); + } + + public Lemmatizer(Stream streamIn) + { + Deserialize(streamIn); + } + + public void Serialize(Stream streamOut) + { + Serialize(streamOut, true, Compression.None); + } + public void Serialize(Stream streamOut, bool bSerializeExamples) + { + Serialize(streamOut, bSerializeExamples, Compression.None); + } + public void Serialize(Stream streamOut, bool bSerializeExamples, Compression compress) + { + streamOut.WriteByte((byte)compress); + switch (compress) + { + case Compression.None: + SerializeNone(streamOut, bSerializeExamples); + break; + case Compression.Deflate: + SerializeDeflate(streamOut, bSerializeExamples); + break; + case Compression.LZMA: + SerializeLZMA(streamOut, bSerializeExamples); + break; + default: + break; + } + } + + private void SerializeNone(Stream streamOut, bool bSerializeExamples) + { + using (var binWrt = new BinaryWriter(streamOut)) + { + this.Serialize(binWrt, bSerializeExamples); + } + } + + private void SerializeDeflate(Stream streamOut, bool bSerializeExamples) + { + using (var streamOutNew = new DeflateStream(streamOut, CompressionMode.Compress, true)) + { + using (var binWrt = new BinaryWriter(streamOutNew)) + { + this.Serialize(binWrt, bSerializeExamples); + binWrt.Flush(); + binWrt.Close(); + } + } + } + + private void SerializeLZMA(Stream streamOut, bool bSerializeExamples) + { + CoderPropID[] propIDs = + { + CoderPropID.DictionarySize, + CoderPropID.PosStateBits, + CoderPropID.LitContextBits, + CoderPropID.LitPosBits, + CoderPropID.Algorithm, + CoderPropID.NumFastBytes, + CoderPropID.MatchFinder, + CoderPropID.EndMarker + }; + + Int32 dictionary = 1 << 23; + Int32 posStateBits = 2; + Int32 litContextBits = 3; // for normal files + Int32 litPosBits = 0; + Int32 algorithm = 2; + Int32 numFastBytes = 128; + var mf = "bt4"; + var eos = false; + + object[] properties = + { + (Int32)(dictionary), + (Int32)(posStateBits), + (Int32)(litContextBits), + (Int32)(litPosBits), + (Int32)(algorithm), + (Int32)(numFastBytes), + mf, + eos + }; + + using (var msTemp = new MemoryStream()) + { + using (var binWrtTemp = new BinaryWriter(msTemp)) + { + this.Serialize(binWrtTemp, bSerializeExamples); + msTemp.Position = 0; + var encoder = new SevenZip.Compression.LZMA.Encoder(); + encoder.SetCoderProperties(propIDs, properties); + encoder.WriteCoderProperties(streamOut); + var fileSize = msTemp.Length; + for (int i = 0; i < 8; i++) + { + streamOut.WriteByte((Byte)(fileSize >> (8 * i))); + } + encoder.Code(msTemp, streamOut, -1, -1, null); + binWrtTemp.Close(); + encoder = null; + } + msTemp.Close(); + } + } + + public void Deserialize(Stream streamIn) + { + var compr = (Compression)streamIn.ReadByte(); + using (var streamInNew = Decompress(streamIn, compr)) + { + using (var br = new BinaryReader(streamInNew)) + { + Deserialize(br); + } + } + } + + private Stream Decompress(Stream streamIn, Compression compress) + { + Stream streamInNew; + switch (compress) + { + case Compression.None: + default: + streamInNew = streamIn; + break; + case Compression.Deflate: + streamInNew = new DeflateStream(streamIn, CompressionMode.Decompress); + break; + case Compression.LZMA: + streamInNew = DecompressLZMA(streamIn); + break; + } + return streamInNew; + } + + private Stream DecompressLZMA(Stream streamIn) + { + var properties = new byte[5]; + if (streamIn.Read(properties, 0, 5) != 5) + throw new Exception("input .lzma is too short"); + var decoder = new SevenZip.Compression.LZMA.Decoder(); + decoder.SetDecoderProperties(properties); + + long outSize = 0; + for (var i = 0; i < 8; i++) + { + var v = streamIn.ReadByte(); + if (v < 0) + throw (new Exception("Can't Read 1")); + outSize |= ((long)(byte)v) << (8 * i); + } + var compressedSize = streamIn.Length - streamIn.Position; + var outStream = new MemoryStream(); + decoder.Code(streamIn, outStream, compressedSize, outSize, null); + outStream.Seek(0, 0); + decoder = null; + return outStream; + } + #endregion + + #region Serialization Functions (Latino) +#if LATINO + + public void Save(Latino.BinarySerializer binWrt) { + lsett.Save(binWrt); + + elExamples.Save(binWrt, true, false); + + ltnRootNode.Save(binWrt); + if (lsett.bBuildFrontLemmatizer) + ltnRootNodeFront.Save(binWrt); + } + + public void Load(Latino.BinarySerializer binRead) { + lsett = new LemmatizerSettings(binRead); + elExamples = new ExampleList(binRead, lsett); + if (!lsett.bBuildFrontLemmatizer) { + ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples, null); + } + else { + ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples.GetFrontRearExampleList(false) , null); + ltnRootNodeFront = new LemmaTreeNode(binRead, lsett, elExamples.GetFrontRearExampleList(true), null); + } + } + + public Lemmatizer(Latino.BinarySerializer binRead) { + Load(binRead); + } + + public void Save(Stream streamOut) { + Latino.BinarySerializer binWrt = new Latino.BinarySerializer(streamOut); + this.Save(binWrt); + binWrt.Close(); + } + public void Load(Stream streamIn) { + Latino.BinarySerializer binRead = new Latino.BinarySerializer(streamIn); + Load(binRead); + binRead.Close(); + } + + public Lemmatizer(Stream streamIn, string sDummy) { + Load(streamIn); + } + +#endif + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/LemmatizerSettings.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/LemmatizerSettings.cs new file mode 100644 index 0000000..04ed7a0 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/LemmatizerSettings.cs @@ -0,0 +1,143 @@ +using System; +using System.IO; +using System.Runtime.Serialization; + +namespace LemmaSharp +{ + /// + /// These are the lemmagen algorithm settings that affect speed/power of the learning and lemmatizing algorithm. + /// TODO this class will be probbably removed in the future. + /// + [Serializable] + public class LemmatizerSettings : ISerializable + { + #region Constructor(s) + public LemmatizerSettings() + { + } + #endregion + + #region Sub-Structures + /// + /// How algorithm considers msd tags. + /// + public enum MsdConsideration + { + /// + /// Completely ignores mds tags (join examples with different tags and sum their weihgts). + /// + Ignore, + /// + /// Same examples with different msd's are not considered equal and joined. + /// + Distinct, + /// + /// Joins examples with different tags (concatenates all msd tags). + /// + JoinAll, + /// + /// Joins examples with different tags (concatenates just distinct msd tags - somehow slower). + /// + JoinDistinct, + /// + /// Joins examples with different tags (new tag is the left to right substring that all joined examples share). + /// + JoinSameSubstring + } + #endregion + + #region Public Variables + /// + /// True if from string should be included in rule identifier ([from]->[to]). False if just length of from string is used ([#len]->[to]). + /// + public bool bUseFromInRules = true; + /// + /// Specification how algorithm considers msd tags. + /// + public MsdConsideration eMsdConsider = MsdConsideration.Distinct; + /// + /// How many of the best rules are kept in memory for each node. Zero means unlimited. + /// + public int iMaxRulesPerNode = 0; + /// + /// If true, than build proccess uses few more hevristics to build first left to right lemmatizer (lemmatizes front of the word) + /// + public bool bBuildFrontLemmatizer = false; + #endregion + + #region Cloneable functions + public LemmatizerSettings CloneDeep() + { + return new LemmatizerSettings() + { + bUseFromInRules = this.bUseFromInRules, + eMsdConsider = this.eMsdConsider, + iMaxRulesPerNode = this.iMaxRulesPerNode, + bBuildFrontLemmatizer = this.bBuildFrontLemmatizer + }; + } + #endregion + + #region Serialization Functions (ISerializable) + public void GetObjectData(SerializationInfo info, StreamingContext context) + { + info.AddValue("bUseFromInRules", bUseFromInRules); + info.AddValue("eMsdConsider", eMsdConsider); + info.AddValue("iMaxRulesPerNode", iMaxRulesPerNode); + info.AddValue("bBuildFrontLemmatizer", bBuildFrontLemmatizer); + } + public LemmatizerSettings(SerializationInfo info, StreamingContext context) + { + bUseFromInRules = info.GetBoolean("bUseFromInRules"); + eMsdConsider = (MsdConsideration)info.GetValue("eMsdConsider", typeof(MsdConsideration)); + iMaxRulesPerNode = info.GetInt32("iMaxRulesPerNode"); + bBuildFrontLemmatizer = info.GetBoolean("bBuildFrontLemmatizer"); + } + #endregion + + #region Serialization Functions (Binary) + public void Serialize(BinaryWriter binWrt) + { + binWrt.Write(bUseFromInRules); + binWrt.Write((int)eMsdConsider); + binWrt.Write(iMaxRulesPerNode); + binWrt.Write(bBuildFrontLemmatizer); + } + public void Deserialize(BinaryReader binRead) + { + bUseFromInRules = binRead.ReadBoolean(); + eMsdConsider = (MsdConsideration)binRead.ReadInt32(); + iMaxRulesPerNode = binRead.ReadInt32(); + bBuildFrontLemmatizer = binRead.ReadBoolean(); + } + public LemmatizerSettings(System.IO.BinaryReader binRead) + { + this.Deserialize(binRead); + } + #endregion + + #region Serialization Functions (Latino) +#if LATINO + + public void Save(Latino.BinarySerializer binWrt) { + binWrt.WriteBool(bUseFromInRules); + binWrt.WriteInt((int)eMsdConsider); + binWrt.WriteInt(iMaxRulesPerNode); + binWrt.WriteBool(bBuildFrontLemmatizer); + } + + public void Load(Latino.BinarySerializer binRead) { + bUseFromInRules = binRead.ReadBool(); + eMsdConsider = (MsdConsideration)binRead.ReadInt(); + iMaxRulesPerNode = binRead.ReadInt(); + bBuildFrontLemmatizer = binRead.ReadBool(); + } + + public LemmatizerSettings(Latino.BinarySerializer reader) { + Load(reader); + } + +#endif + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/RuleList.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/RuleList.cs new file mode 100644 index 0000000..fbaad6f --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/RuleList.cs @@ -0,0 +1,161 @@ +using System.Collections.Generic; +using System.IO; + +namespace LemmaSharp +{ + public class RuleList : Dictionary + { + #region Private Variables + private LemmatizerSettings lsett; + private LemmaRule lrDefaultRule; + #endregion + + #region Constructor(s) + public RuleList(LemmatizerSettings lsett) + { + this.lsett = lsett; + lrDefaultRule = AddRule(new LemmaRule("", "", 0, lsett)); + } + #endregion + + #region Public Properties + public LemmaRule DefaultRule + { + get + { + return lrDefaultRule; + } + } + #endregion + + #region Essential Class Functions + public LemmaRule AddRule(LemmaExample le) + { + return AddRule(new LemmaRule(le.Word, le.Lemma, this.Count, lsett)); + } + private LemmaRule AddRule(LemmaRule lrRuleNew) + { + LemmaRule lrRuleReturn = null; + if (!this.TryGetValue(lrRuleNew.Signature, out lrRuleReturn)) + { + lrRuleReturn = lrRuleNew; + this.Add(lrRuleReturn.Signature, lrRuleReturn); + } + return lrRuleReturn; + } + #endregion + + #region Serialization Functions (Binary) + public void Serialize(BinaryWriter binWrt, bool bThisTopObject) + { + //save metadata + binWrt.Write(bThisTopObject); + + //save value types -------------------------------------- + + //save refernce types if needed ------------------------- + if (bThisTopObject) + lsett.Serialize(binWrt); + + //save list items --------------------------------------- + var iCount = this.Count; + binWrt.Write(iCount); + foreach (var kvp in this) + { + binWrt.Write(kvp.Key); + kvp.Value.Serialize(binWrt, false); + } + //default rule is already saved in the list. Here just save its id. + binWrt.Write(lrDefaultRule.Signature); + } + + public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett) + { + //load metadata + var bThisTopObject = binRead.ReadBoolean(); + + //load value types -------------------------------------- + + //load refernce types if needed ------------------------- + if (bThisTopObject) + this.lsett = new LemmatizerSettings(binRead); + else + this.lsett = lsett; + + //load list items --------------------------------------- + this.Clear(); + int iCount = binRead.ReadInt32(); + for (var iId = 0; iId < iCount; iId++) + { + var sKey = binRead.ReadString(); + var lrVal = new LemmaRule(binRead, this.lsett); + this.Add(sKey, lrVal); + } + + //link the default rule just Id was saved. + lrDefaultRule = this[binRead.ReadString()]; + } + + public RuleList(System.IO.BinaryReader binRead, LemmatizerSettings lsett) + { + this.Deserialize(binRead, lsett); + } + #endregion + + #region Serialization Functions (Latino) +#if LATINO + + public void Save(Latino.BinarySerializer binWrt, bool bThisTopObject) { + //save metadata + binWrt.WriteBool(bThisTopObject); + + //save value types -------------------------------------- + + //save refernce types if needed ------------------------- + if (bThisTopObject) + lsett.Save(binWrt); + + //save list items --------------------------------------- + int iCount = this.Count; + binWrt.WriteInt(iCount); + foreach (KeyValuePair kvp in this) { + binWrt.WriteString(kvp.Key); + kvp.Value.Save(binWrt, false); + } + + //default rule is already saved in the list. Here just save its id. + binWrt.WriteString(lrDefaultRule.Signature); + } + public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett) { + //load metadata + bool bThisTopObject = binRead.ReadBool(); + + //load value types -------------------------------------- + + //load refernce types if needed ------------------------- + if (bThisTopObject) + this.lsett = new LemmatizerSettings(binRead); + else + this.lsett = lsett; + + //load list items --------------------------------------- + this.Clear(); + int iCount = binRead.ReadInt(); + for (int iId = 0; iId < iCount; iId++) { + string sKey = binRead.ReadString(); + LemmaRule lrVal = new LemmaRule(binRead, this.lsett); + this.Add(sKey, lrVal); + } + + //link the default rule just Id was saved. + lrDefaultRule = this[binRead.ReadString()]; + + } + public RuleList(Latino.BinarySerializer binRead, LemmatizerSettings lsett) { + Load(binRead, lsett); + } + +#endif + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/RuleWeighted.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/RuleWeighted.cs new file mode 100644 index 0000000..c4d332b --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Classes/RuleWeighted.cs @@ -0,0 +1,50 @@ +using System; + +namespace LemmaSharp +{ + [Serializable] + class RuleWeighted : IComparable + { + #region Private Variables + private LemmaRule lrRule; + private double dWeight; + #endregion + + #region Constructor(s) + public RuleWeighted(LemmaRule lrRule, double dWeight) + { + this.lrRule = lrRule; + this.dWeight = dWeight; + } + #endregion + + #region Public Properties + public LemmaRule Rule + { + get { return lrRule; } + } + public double Weight + { + get { return dWeight; } + } + #endregion + + #region Essential Class Functions (comparing objects, eg.: for sorting) + public int CompareTo(RuleWeighted rl) + { + if (this.dWeight < rl.dWeight) return 1; + if (this.dWeight > rl.dWeight) return -1; + if (this.lrRule.Id < rl.lrRule.Id) return 1; + if (this.lrRule.Id > rl.lrRule.Id) return -1; + return 0; + } + #endregion + + #region Output & Serialization Functions + public override string ToString() + { + return string.Format("{0}{1:(0.00%)}", lrRule, dWeight); + } + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/ExternalLibs/7zipSources.7z b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/ExternalLibs/7zipSources.7z new file mode 100644 index 0000000..5463ceb Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/ExternalLibs/7zipSources.7z differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/ExternalLibs/Lzma#.dll b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/ExternalLibs/Lzma#.dll new file mode 100644 index 0000000..2bb9990 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/ExternalLibs/Lzma#.dll differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Interfaces/ILemmatizer.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Interfaces/ILemmatizer.cs new file mode 100644 index 0000000..50c13a0 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Interfaces/ILemmatizer.cs @@ -0,0 +1,9 @@ +using System.Runtime.Serialization; + +namespace LemmaSharp +{ + public interface ILemmatizer : ISerializable + { + string Lemmatize(string sWord); + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Interfaces/ILemmatizerModel.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Interfaces/ILemmatizerModel.cs new file mode 100644 index 0000000..367203c --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Interfaces/ILemmatizerModel.cs @@ -0,0 +1,8 @@ +namespace LemmaSharp +{ + public interface ILemmatizerModel + { + string Lemmatize(string sWord); + string ToString(); + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Interfaces/ILemmatizerTrainable.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Interfaces/ILemmatizerTrainable.cs new file mode 100644 index 0000000..cfd18ab --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/Interfaces/ILemmatizerTrainable.cs @@ -0,0 +1,12 @@ +namespace LemmaSharp +{ + public interface ITrainableLemmatizer : ILemmatizer + { + ExampleList Examples { get; } + ILemmatizerModel Model { get; } + void AddExample(string sWord, string sLemma); + void AddExample(string sWord, string sLemma, double dWeight); + void AddExample(string sWord, string sLemma, double dWeight, string sMsd); + void BuildModel(); + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/LatinoCompatibility/BinarySerializer.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/LatinoCompatibility/BinarySerializer.cs new file mode 100644 index 0000000..74effcb --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/LatinoCompatibility/BinarySerializer.cs @@ -0,0 +1,539 @@ +/*==========================================================================; + * + * (c) 2004-08 JSI. All rights reserved. + * + * File: BinarySerializer.cs + * Version: 1.0 + * Desc: Binary serializer + * Author: Miha Grcar + * Created on: Oct-2004 + * Last modified: May-2008 + * Revision: May-2008 + * + ***************************************************************************/ + +//Remark: Use this file as Latino compatibility checker. When it is included in +// the project it defines symbol LATINO, that should enable all Latino specific +// serialization functions. When excluded, this code will not be created and also +// following Latino namspace will not be added to the project. + + +using System; +using System.Runtime.InteropServices; +using System.Collections.Generic; +using System.Reflection; +using System.Text; +using System.IO; + +#if LATINO + +namespace Latino +{ + /* .----------------------------------------------------------------------- + | + | Class BinarySerializer + | + '----------------------------------------------------------------------- + */ + public interface ISerializable { + // *** note that you need to implement a constructor that loads the instance if the class implements Latino.ISerializable + void Save(Latino.BinarySerializer writer); + } + + public class BinarySerializer + { + private static Dictionary m_full_to_short_type_name + = new Dictionary(); + private static Dictionary m_short_to_full_type_name + = new Dictionary(); + private Stream m_stream; + private string m_data_dir + = "."; + private static void RegisterTypeName(string full_type_name, string short_type_name) + { + m_full_to_short_type_name.Add(full_type_name, short_type_name); + m_short_to_full_type_name.Add(short_type_name, full_type_name); + } + private static string GetFullTypeName(string short_type_name) + { + return m_short_to_full_type_name.ContainsKey(short_type_name) ? m_short_to_full_type_name[short_type_name] : short_type_name; + } + private static string GetShortTypeName(string full_type_name) + { + return m_full_to_short_type_name.ContainsKey(full_type_name) ? m_full_to_short_type_name[full_type_name] : full_type_name; + } + static BinarySerializer() + { + RegisterTypeName(typeof(bool).AssemblyQualifiedName, "b"); + RegisterTypeName(typeof(byte).AssemblyQualifiedName, "ui1"); + RegisterTypeName(typeof(sbyte).AssemblyQualifiedName, "i1"); + RegisterTypeName(typeof(char).AssemblyQualifiedName, "c"); + RegisterTypeName(typeof(double).AssemblyQualifiedName, "f8"); + RegisterTypeName(typeof(float).AssemblyQualifiedName, "f4"); + RegisterTypeName(typeof(int).AssemblyQualifiedName, "i4"); + RegisterTypeName(typeof(uint).AssemblyQualifiedName, "ui4"); + RegisterTypeName(typeof(long).AssemblyQualifiedName, "i8"); + RegisterTypeName(typeof(ulong).AssemblyQualifiedName, "ui8"); + RegisterTypeName(typeof(short).AssemblyQualifiedName, "i2"); + RegisterTypeName(typeof(ushort).AssemblyQualifiedName, "ui2"); + RegisterTypeName(typeof(string).AssemblyQualifiedName, "s"); + } + public BinarySerializer(Stream stream) + { + //Utils.ThrowException(stream == null ? new ArgumentNullException("stream") : null); + m_stream = stream; + } + public BinarySerializer() + { + m_stream = new MemoryStream(); + } + public BinarySerializer(string file_name, FileMode file_mode) + { + m_stream = new FileStream(file_name, file_mode); // throws ArgumentException, NotSupportedException, ArgumentNullException, SecurityException, FileNotFoundException, IOException, DirectoryNotFoundException, PathTooLongException, ArgumentOutOfRangeException + } + // *** Reading *** + private byte[] Read() // Read() is directly or indirectly called from several methods thus exceptions thrown here can also be thrown in all those methods + { + int sz = Marshal.SizeOf(typeof(T)); + byte[] buffer = new byte[sz]; + int num_bytes = m_stream.Read(buffer, 0, sz); // throws IOException, NotSupportedException, ObjectDisposedException + //Utils.ThrowException(num_bytes < sz ? new EndOfStreamException() : null); + return buffer; + } + public bool ReadBool() + { + return ReadByte() != 0; + } + public byte ReadByte() // ReadByte() is directly or indirectly called from several methods thus exceptions thrown here can also be thrown in all those methods + { + int val = m_stream.ReadByte(); // throws NotSupportedException, ObjectDisposedException + //Utils.ThrowException(val < 0 ? new EndOfStreamException() : null); + return (byte)val; + } + public sbyte ReadSByte() + { + return (sbyte)ReadByte(); + } + private char ReadChar8() + { + return (char)ReadByte(); + } + private char ReadChar16() + { + return BitConverter.ToChar(Read(), 0); + } + public char ReadChar() + { + return ReadChar16(); + } + public double ReadDouble() + { + return BitConverter.ToDouble(Read(), 0); + } + public float ReadFloat() + { + return BitConverter.ToSingle(Read(), 0); + } + public int ReadInt() + { + return BitConverter.ToInt32(Read(), 0); + } + public uint ReadUInt() + { + return BitConverter.ToUInt32(Read(), 0); + } + public long ReadLong() + { + return BitConverter.ToInt64(Read(), 0); + } + public ulong ReadULong() + { + return BitConverter.ToUInt64(Read(), 0); + } + public short ReadShort() + { + return BitConverter.ToInt16(Read(), 0); + } + public ushort ReadUShort() + { + return BitConverter.ToUInt16(Read(), 0); + } + private string ReadString8() + { + int len = ReadInt(); + if (len < 0) { return null; } + byte[] buffer = new byte[len]; + m_stream.Read(buffer, 0, len); // throws IOException, NotSupportedException, ObjectDisposedException + return Encoding.ASCII.GetString(buffer); + } + private string ReadString16() + { + int len = ReadInt(); + if (len < 0) { return null; } + byte[] buffer = new byte[len * 2]; + m_stream.Read(buffer, 0, len * 2); // throws IOException, NotSupportedException, ObjectDisposedException + return Encoding.Unicode.GetString(buffer); + } + public string ReadString() + { + return ReadString16(); // throws exceptions (see ReadString16()) + } + public Type ReadType() + { + string type_name = ReadString8(); // throws exceptions (see ReadString8()) + //Utils.ThrowException(type_name == null ? new InvalidDataException() : null); + return Type.GetType(GetFullTypeName(type_name)); // throws TargetInvocationException, ArgumentException, TypeLoadException, FileNotFoundException, FileLoadException, BadImageFormatException + } + public ValueType ReadValue(Type type) + { + //Utils.ThrowException(type == null ? new ArgumentNullException("type") : null); + //Utils.ThrowException(!type.IsValueType ? new InvalidArgumentValueException("type") : null); + if (type == typeof(bool)) + { + return ReadBool(); + } + else if (type == typeof(byte)) + { + return ReadByte(); + } + else if (type == typeof(sbyte)) + { + return ReadSByte(); + } + else if (type == typeof(char)) + { + return ReadChar(); + } + else if (type == typeof(double)) + { + return ReadDouble(); + } + else if (type == typeof(float)) + { + return ReadFloat(); + } + else if (type == typeof(int)) + { + return ReadInt(); + } + else if (type == typeof(uint)) + { + return ReadUInt(); + } + else if (type == typeof(long)) + { + return ReadLong(); + } + else if (type == typeof(ulong)) + { + return ReadULong(); + } + else if (type == typeof(short)) + { + return ReadShort(); + } + else if (type == typeof(ushort)) + { + return ReadUShort(); + } + else if (typeof(Latino.ISerializable).IsAssignableFrom(type)) + { + ConstructorInfo cxtor = type.GetConstructor(new Type[] { typeof(Latino.BinarySerializer) }); + //Utils.ThrowException(cxtor == null ? new ArgumentNotSupportedException("type") : null); + return (ValueType)cxtor.Invoke(new object[] { this }); // throws MemberAccessException, MethodAccessException, TargetInvocationException, NotSupportedException, SecurityException + } + else + { + //throw new ArgumentNotSupportedException("type"); + throw new Exception("type"); + } + } + public T ReadValue() + { + return (T)(object)ReadValue(typeof(T)); // throws exceptions (see ReadValue(Type type)) + } + public object ReadObject(Type type) + { + //Utils.ThrowException(type == null ? new ArgumentNullException("type") : null); + switch (ReadByte()) + { + case 0: + return null; + case 1: + break; + case 2: + Type type_0 = ReadType(); // throws exceptions (see ReadType()) + //Utils.ThrowException(type_0 == null ? new TypeLoadException() : null); + //Utils.ThrowException(!type.IsAssignableFrom(type_0) ? new InvalidArgumentValueException("type") : null); + type = type_0; + break; + default: + throw new InvalidDataException(); + } + if (type == typeof(string)) + { + return ReadString(); + } + else if (typeof(Latino.ISerializable).IsAssignableFrom(type)) + { + ConstructorInfo cxtor = type.GetConstructor(new Type[] { typeof(Latino.BinarySerializer) }); + //Utils.ThrowException(cxtor == null ? new ArgumentNotSupportedException("type") : null); + return cxtor.Invoke(new object[] { this }); // throws MemberAccessException, MethodAccessException, TargetInvocationException, NotSupportedException, SecurityException + } + else if (type.IsValueType) + { + return ReadValue(type); // throws exceptions (see ReadValue(Type type)) + } + else + { + //throw new InvalidArgumentValueException("type"); + throw new Exception("type"); + } + } + public T ReadObject() + { + return (T)ReadObject(typeof(T)); // throws exceptions (see ReadObject(Type type)) + } + public object ReadValueOrObject(Type type) + { + //Utils.ThrowException(type == null ? new ArgumentNullException("type") : null); + if (type.IsValueType) + { + return ReadValue(type); // throws exceptions (see ReadValue(Type type)) + } + else + { + return ReadObject(type); // throws exceptions (see ReadObject(Type type)) + } + } + public T ReadValueOrObject() + { + return (T)ReadValueOrObject(typeof(T)); // throws exceptions (see ReadValueOrObject(Type type)) + } + // *** Writing *** + private void Write(byte[] data) // Write(byte[] data) is directly or indirectly called from several methods thus exceptions thrown here can also be thrown in all those methods + { + m_stream.Write(data, 0, data.Length); // throws IOException, NotSupportedException, ObjectDisposedException + } + public void WriteBool(bool val) + { + WriteByte(val ? (byte)1 : (byte)0); + } + public void WriteByte(byte val) // WriteByte(byte val) is directly or indirectly called from several methods thus exceptions thrown here can also be thrown in all those methods + { + m_stream.WriteByte(val); // throws IOException, NotSupportedException, ObjectDisposedException + } + public void WriteSByte(sbyte val) + { + WriteByte((byte)val); + } + private void WriteChar8(char val) + { + WriteByte(Encoding.ASCII.GetBytes(new char[] { val })[0]); + } + private void WriteChar16(char val) + { + Write(BitConverter.GetBytes((ushort)val)); + } + public void WriteChar(char val) + { + WriteChar16(val); + } + public void WriteDouble(double val) + { + Write(BitConverter.GetBytes(val)); + } + public void WriteFloat(float val) + { + Write(BitConverter.GetBytes(val)); + } + public void WriteInt(int val) + { + Write(BitConverter.GetBytes(val)); + } + public void WriteUInt(uint val) + { + Write(BitConverter.GetBytes(val)); + } + public void WriteLong(long val) + { + Write(BitConverter.GetBytes(val)); + } + public void WriteULong(ulong val) + { + Write(BitConverter.GetBytes(val)); + } + public void WriteShort(short val) + { + Write(BitConverter.GetBytes(val)); + } + public void WriteUShort(ushort val) + { + Write(BitConverter.GetBytes(val)); + } + private void WriteString8(string val) + { + if (val == null) { WriteInt(-1); return; } + WriteInt(val.Length); + Write(Encoding.ASCII.GetBytes(val)); + } + private void WriteString16(string val) + { + if (val == null) { WriteInt(-1); return; } + WriteInt(val.Length); + Write(Encoding.Unicode.GetBytes(val)); + } + public void WriteString(string val) + { + WriteString16(val); + } + public void WriteValue(ValueType val) + { + if (val is bool) + { + WriteBool((bool)val); + } + else if (val is byte) + { + WriteByte((byte)val); + } + else if (val is sbyte) + { + WriteSByte((sbyte)val); + } + else if (val is char) + { + WriteChar((char)val); + } + else if (val is double) + { + WriteDouble((double)val); + } + else if (val is float) + { + WriteFloat((float)val); + } + else if (val is int) + { + WriteInt((int)val); + } + else if (val is uint) + { + WriteUInt((uint)val); + } + else if (val is long) + { + WriteLong((long)val); + } + else if (val is ulong) + { + WriteULong((ulong)val); + } + else if (val is short) + { + WriteShort((short)val); + } + else if (val is ushort) + { + WriteUShort((ushort)val); + } + else if (val is Latino.ISerializable) + { + ((Latino.ISerializable)val).Save(this); // throws serialization-related exceptions + } + else + { + //throw new ArgumentTypeException("val"); + } + } + public void WriteObject(Type type, object obj) + { + //Utils.ThrowException(type == null ? new ArgumentNullException("type") : null); + //Utils.ThrowException((obj != null && !type.IsAssignableFrom(obj.GetType())) ? new ArgumentTypeException("obj") : null); + if (obj == null) + { + WriteByte(0); + } + else + { + Type obj_type = obj.GetType(); + if (obj_type == type) + { + WriteByte(1); + } + else + { + WriteByte(2); + WriteType(obj_type); + } + if (obj is string) + { + WriteString((string)obj); + } + else if (obj is Latino.ISerializable) + { + ((Latino.ISerializable)obj).Save(this); // throws serialization-related exceptions + } + else if (obj is ValueType) + { + WriteValue((ValueType)obj); // throws exceptions (see WriteValue(ValueType val)) + } + else + { + //throw new ArgumentTypeException("obj"); + } + } + } + public void WriteObject(T obj) + { + WriteObject(typeof(T), obj); // throws exceptions (see WriteObject(Type type, object obj)) + } + public void WriteValueOrObject(Type type, object obj) + { + //Utils.ThrowException(type == null ? new ArgumentNullException("type") : null); + //Utils.ThrowException(!type.IsAssignableFrom(obj.GetType()) ? new ArgumentTypeException("obj") : null); + if (type.IsValueType) + { + WriteValue((ValueType)obj); // throws exceptions (see WriteValue(ValueType val)) + } + else + { + WriteObject(type, obj); // throws exceptions (see WriteObject(Type type, object obj)) + } + } + public void WriteValueOrObject(T obj) + { + WriteValueOrObject(typeof(T), obj); // throws exceptions (see WriteValueOrObject(Type type, object obj)) + } + public void WriteType(Type type) + { + //Utils.ThrowException(type == null ? new ArgumentNullException("type") : null); + WriteString8(GetShortTypeName(type.AssemblyQualifiedName)); + } + // *** Data directory *** + public string DataDir + { + get { return m_data_dir; } + set + { + //Utils.ThrowException(!Utils.VerifyPathName(value, /*must_exist=*/true) ? new InvalidArgumentValueException("DataDir") : null); + m_data_dir = value; + } + } + // *** Access to the associated stream *** + public void Close() + { + m_stream.Close(); + } + public void Flush() + { + m_stream.Flush(); // throws IOException + } + public Stream Stream + { + get { return m_stream; } + } + } +} + +#endif diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/LemmaSharp.csproj b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/LemmaSharp.csproj new file mode 100644 index 0000000..5ddc1e8 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharp/LemmaSharp.csproj @@ -0,0 +1,165 @@ + + + + Debug + AnyCPU + 9.0.21022 + 2.0 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4} + Library + Properties + LemmaSharp + LemmaSharp + v4.7 + 512 + true + + + + + + + 3.5 + + http://localhost/LemmaSharp/ + true + Web + true + Foreground + 7 + Days + false + false + true + 0 + 1.0.0.%2a + false + true + + + true + full + false + bin\Debug\ + TRACE;DEBUG;NOLATINO + prompt + 4 + false + + + pdbonly + true + bin\Release\ + TRACE;NOLATINO + prompt + 4 + false + + + true + bin\x86\Debug\ + DEBUG;TRACE + full + x86 + true + GlobalSuppressions.cs + prompt + false + + + bin\x86\Release\ + TRACE + true + pdbonly + x86 + true + GlobalSuppressions.cs + prompt + false + + + true + bin\x64\Debug\ + DEBUG;TRACE + full + x64 + true + GlobalSuppressions.cs + prompt + false + + + bin\x64\Release\ + TRACE + true + pdbonly + x64 + true + GlobalSuppressions.cs + prompt + false + + + + False + ExternalLibs\Lzma#.dll + + + + + + + + Code + + + + + + + + + + + + + + + + False + .NET Framework Client Profile + false + + + False + .NET Framework 2.0 %28x86%29 + true + + + False + .NET Framework 3.0 %28x86%29 + false + + + False + .NET Framework 3.5 + false + + + False + .NET Framework 3.5 SP1 + false + + + + + + + + \ No newline at end of file diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharpBase.sln b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharpBase.sln new file mode 100644 index 0000000..97e6ee9 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_Base/LemmaSharpBase.sln @@ -0,0 +1,34 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 14 +VisualStudioVersion = 14.0.25420.1 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LemmaSharp", "LemmaSharp\LemmaSharp.csproj", "{A39293C1-92D8-47B9-93A4-41F443B4F9E4}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|Any CPU.Build.0 = Debug|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x64.ActiveCfg = Debug|x64 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x64.Build.0 = Debug|x64 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x86.ActiveCfg = Debug|x86 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x86.Build.0 = Debug|x86 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|Any CPU.ActiveCfg = Release|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|Any CPU.Build.0 = Release|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x64.ActiveCfg = Release|x64 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x64.Build.0 = Release|x64 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x86.ActiveCfg = Release|x86 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x86.Build.0 = Release|x86 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/ExampleList.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/ExampleList.cs new file mode 100644 index 0000000..d6b87b5 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/ExampleList.cs @@ -0,0 +1,381 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Runtime.Serialization; +using System.Text; + +namespace LemmaSharp +{ + [Serializable] + public class ExampleList : ISerializable + { + #region Private Variables + private LemmatizerSettings lsett; + private RuleList rlRules; + private Dictionary dictExamples; + private List lstExamples; + #endregion + + #region Constructor(s) + public ExampleList(LemmatizerSettings lsett) : base() + { + this.lsett = lsett; + this.dictExamples = new Dictionary(); + this.lstExamples = null; + this.rlRules = new RuleList(lsett); + } + public ExampleList(StreamReader srIn, string sFormat, LemmatizerSettings lsett) : this(lsett) + { + AddMultextFile(srIn, sFormat); + } + #endregion + + #region Public Properties & Indexers + public LemmaExample this[int i] + { + get + { + if (lstExamples == null) + FinalizeAdditions(); + return lstExamples[i]; + } + } + public int Count + { + get + { + if (lstExamples == null) + FinalizeAdditions(); + return lstExamples.Count; + } + } + public double WeightSum + { + get + { + if (lstExamples == null) + FinalizeAdditions(); + + double dWeight = 0; + foreach (LemmaExample exm in lstExamples) + dWeight += exm.Weight; + return dWeight; + } + } + public RuleList Rules + { + get + { + return rlRules; + } + } + public List ListExamples + { + get + { + if (lstExamples == null) + FinalizeAdditions(); + return lstExamples; + } + } + #endregion + + #region Essential Class Functions (adding/removing examples) + public void AddMultextFile(StreamReader srIn, string sFormat) + { + //read from file + string sLine = null; + int iError = 0; + int iLine = 0; + var iW = sFormat.IndexOf('W'); + var iL = sFormat.IndexOf('L'); + var iM = sFormat.IndexOf('M'); + var iF = sFormat.IndexOf('F'); + var iLen = Math.Max(Math.Max(iW, iL), Math.Max(iM, iF)) + 1; + + if (iW < 0 || iL < 0) + { + throw new Exception("Can not find word and lemma location in the format specification"); + } + while ((sLine = srIn.ReadLine()) != null && iError < 50) + { + iLine++; + string[] asWords = sLine.Split(new char[] { '\t' }); + if (asWords.Length < iLen) + { + //Console.WriteLine("ERROR: Line doesn't confirm to the given format \"" + sFormat + "\"! Line " + iLine.ToString() + "."); + iError++; + continue; + } + var sWord = asWords[iW]; + var sLemma = asWords[iL]; + if (sLemma.Equals("=", StringComparison.Ordinal)) + sLemma = sWord; + string sMsd = null; + if (iM > -1) + sMsd = asWords[iM]; + double dWeight = 1; ; + if (iF > -1) + Double.TryParse(asWords[iM], out dWeight); + AddExample(sWord, sLemma, dWeight, sMsd); + } + if (iError == 50) + throw new Exception("Parsing stopped because of too many (50) errors. Check format specification"); + } + + public LemmaExample AddExample(string sWord, string sLemma, double dWeight, string sMsd) + { + string sNewMsd = lsett.eMsdConsider != LemmatizerSettings.MsdConsideration.Ignore + ? sMsd + : null; + var leNew = new LemmaExample(sWord, sLemma, dWeight, sNewMsd, rlRules, lsett); + return Add(leNew); + } + + private LemmaExample Add(LemmaExample leNew) + { + LemmaExample leReturn = null; + if (!dictExamples.TryGetValue(leNew.Signature, out leReturn)) + { + leReturn = leNew; + dictExamples.Add(leReturn.Signature, leReturn); + } + else + leReturn.Join(leNew); + lstExamples = null; + return leReturn; + } + public void DropExamples() + { + dictExamples.Clear(); + lstExamples = null; + } + public void FinalizeAdditions() + { + if (lstExamples != null) + return; + lstExamples = new List(dictExamples.Values); + lstExamples.Sort(); + } + public ExampleList GetFrontRearExampleList(bool front) + { + var elExamplesNew = new ExampleList(lsett); + foreach (var le in this.ListExamples) + { + if (front) + elExamplesNew.AddExample(le.WordFront, le.LemmaFront, le.Weight, le.Msd); + else + elExamplesNew.AddExample(le.WordRear, le.LemmaRear, le.Weight, le.Msd); + } + elExamplesNew.FinalizeAdditions(); + return elExamplesNew; + } + #endregion + + #region Output Functions (ToString) + public override string ToString() + { + var sb = new StringBuilder(); + foreach (var exm in lstExamples) + { + sb.AppendLine(exm.ToString()); + } + return sb.ToString(); + } + #endregion + + #region Serialization Functions (.Net Default - ISerializable) + public void GetObjectData(SerializationInfo info, StreamingContext context) + { + info.AddValue("lsett", lsett); + info.AddValue("iNumExamples", dictExamples.Count); + var aWords = new string[dictExamples.Count]; + var aLemmas = new string[dictExamples.Count]; + var aWeights = new double[dictExamples.Count]; + var aMsds = new string[dictExamples.Count]; + int iExm = 0; + foreach (var exm in dictExamples.Values) + { + aWords[iExm] = exm.Word; + aLemmas[iExm] = exm.Lemma; + aWeights[iExm] = exm.Weight; + aMsds[iExm] = exm.Msd; + iExm++; + } + info.AddValue("aWords", aWords); + info.AddValue("aLemmas", aLemmas); + info.AddValue("aWeights", aWeights); + info.AddValue("aMsds", aMsds); + } + public ExampleList(SerializationInfo info, StreamingContext context) + { + lsett = (LemmatizerSettings)info.GetValue("lsett", typeof(LemmatizerSettings)); + this.dictExamples = new Dictionary(); + this.lstExamples = null; + this.rlRules = new RuleList(lsett); + var aWords = (string[])info.GetValue("aWords", typeof(string[])); + var aLemmas = (string[])info.GetValue("aLemmas", typeof(string[])); + var aWeights = (double[])info.GetValue("aWeights", typeof(double[])); + var aMsds = (string[])info.GetValue("aMsds", typeof(string[])); + for (int iExm = 0; iExm < aWords.Length; iExm++) + AddExample(aWords[iExm], aLemmas[iExm], aWeights[iExm], aMsds[iExm]); + } + #endregion + + #region Serialization Functions (Binary) + public void Serialize(BinaryWriter binWrt, bool bSerializeExamples, bool bThisTopObject) + { + //save metadata + binWrt.Write(bThisTopObject); + + //save refernce types if needed ------------------------- + if (bThisTopObject) + lsett.Serialize(binWrt); + + rlRules.Serialize(binWrt, false); + + if (!bSerializeExamples) + { + binWrt.Write(false); // lstExamples == null + binWrt.Write(0); // dictExamples.Count == 0 + } + else + { + if (lstExamples == null) + { + binWrt.Write(false); // lstExamples == null + //save dictionary items + int iCount = dictExamples.Count; + binWrt.Write(iCount); + foreach (var kvp in dictExamples) + { + binWrt.Write(kvp.Value.Rule.Signature); + kvp.Value.Serialize(binWrt, false); + } + } + else + { + binWrt.Write(true); // lstExamples != null + //save list & dictionary items + var iCount = lstExamples.Count; + binWrt.Write(iCount); + foreach (var le in lstExamples) + { + binWrt.Write(le.Rule.Signature); + le.Serialize(binWrt, false); + } + } + } + } + public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett) + { + //load metadata + var bThisTopObject = binRead.ReadBoolean(); + //load refernce types if needed ------------------------- + if (bThisTopObject) + this.lsett = new LemmatizerSettings(binRead); + else + this.lsett = lsett; + + rlRules = new RuleList(binRead, this.lsett); + var bCreateLstExamples = binRead.ReadBoolean(); + lstExamples = bCreateLstExamples ? new List() : null; + dictExamples = new Dictionary(); + + //load dictionary items + var iCount = binRead.ReadInt32(); + for (var iId = 0; iId < iCount; iId++) + { + var lrRule = rlRules[binRead.ReadString()]; + var le = new LemmaExample(binRead, this.lsett, lrRule); + dictExamples.Add(le.Signature, le); + if (bCreateLstExamples) + lstExamples.Add(le); + } + } + public ExampleList(BinaryReader binRead, LemmatizerSettings lsett) + { + Deserialize(binRead, lsett); + } + #endregion + + #region Serialization Functions (Latino) +#if LATINO + public void Save(Latino.BinarySerializer binWrt, bool bSerializeExamples, bool bThisTopObject) { + //save metadata + binWrt.WriteBool(bThisTopObject); + + //save refernce types if needed ------------------------- + if (bThisTopObject) + lsett.Save(binWrt); + + rlRules.Save(binWrt, false); + + if (!bSerializeExamples) { + binWrt.WriteBool(false); // lstExamples == null + binWrt.WriteInt(0); // dictExamples.Count == 0 + } + else { + if (lstExamples == null) { + binWrt.WriteBool(false); // lstExamples == null + + //save dictionary items + int iCount = dictExamples.Count; + binWrt.WriteInt(iCount); + + foreach (KeyValuePair kvp in dictExamples) { + binWrt.WriteString(kvp.Value.Rule.Signature); + kvp.Value.Save(binWrt, false); + } + } + else { + binWrt.WriteBool(true); // lstExamples != null + + //save list & dictionary items + int iCount = lstExamples.Count; + binWrt.WriteInt(iCount); + + foreach (LemmaExample le in lstExamples) { + binWrt.WriteString(le.Rule.Signature); + le.Save(binWrt, false); + } + } + } + + } + public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett) { + //load metadata + bool bThisTopObject = binRead.ReadBool(); + + //load refernce types if needed ------------------------- + if (bThisTopObject) + this.lsett = new LemmatizerSettings(binRead); + else + this.lsett = lsett; + + rlRules = new RuleList(binRead, this.lsett); + + bool bCreateLstExamples = binRead.ReadBool(); + + lstExamples = bCreateLstExamples ? new List() : null; + dictExamples = new Dictionary(); + + //load dictionary items + int iCount = binRead.ReadInt(); + for (int iId = 0; iId < iCount; iId++) { + LemmaRule lrRule = rlRules[binRead.ReadString()]; + LemmaExample le = new LemmaExample(binRead, this.lsett, lrRule); + + dictExamples.Add(le.Signature, le); + if (bCreateLstExamples) lstExamples.Add(le); + } + + } + public ExampleList(Latino.BinarySerializer binRead, LemmatizerSettings lsett) { + Load(binRead, lsett); + } + +#endif + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/LemmaExample.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/LemmaExample.cs new file mode 100644 index 0000000..7ab57e4 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/LemmaExample.cs @@ -0,0 +1,481 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace LemmaSharp +{ + public class LemmaExample : IComparable, IComparer + { + #region Private Variables + private string sWord; + private string sLemma; + private string sSignature; + private string sMsd; + private double dWeight; + private LemmaRule lrRule; + private LemmatizerSettings lsett; + + private string sWordRearCache; + private string sWordFrontCache; + private string sLemmaFrontCache; + #endregion + + #region Constructor(s) + public LemmaExample(string sWord, string sLemma, double dWeight, string sMsd, RuleList rlRules, LemmatizerSettings lsett) + { + this.lsett = lsett; + this.sWord = sWord; + this.sLemma = sLemma; + this.sMsd = sMsd; + this.dWeight = dWeight; + this.lrRule = rlRules.AddRule(this); + switch (lsett.eMsdConsider) + { + case LemmatizerSettings.MsdConsideration.Ignore: + case LemmatizerSettings.MsdConsideration.JoinAll: + case LemmatizerSettings.MsdConsideration.JoinDistinct: + case LemmatizerSettings.MsdConsideration.JoinSameSubstring: + sSignature = string.Format("[{0}]==>[{1}]", sWord, sLemma); + break; + case LemmatizerSettings.MsdConsideration.Distinct: + default: + sSignature = string.Format("[{0}]==>[{1}]({2})", sWord, sLemma, sMsd ?? ""); + break; + } + this.sWordRearCache = null; + this.sWordFrontCache = null; + this.sLemmaFrontCache = null; + } + #endregion + + #region Public Properties + public string Word + { + get + { + return sWord; + } + } + public string Lemma + { + get + { + return sLemma; + } + } + public string Msd + { + get + { + return sMsd; + } + } + public string Signature + { + get + { + return sSignature; + } + } + public double Weight + { + get + { + return dWeight; + } + } + public LemmaRule Rule + { + get + { + return lrRule; + } + } + /// + /// Word to be pre-lemmatized with Front-Lemmatizer into LemmaFront which is then lemmatized by standard Rear-Lemmatizer (Warning it is reversed) + /// + public string WordFront + { + get + { + if (sWordFrontCache == null) + sWordFrontCache = StringReverse(sWord); + return sWordFrontCache; + } + } + /// + /// Lemma to be produced by pre-lemmatizing with Front-Lemmatizer (Warning it is reversed) + /// + public string LemmaFront + { + get + { + if (sLemmaFrontCache == null) + sLemmaFrontCache = StringReverse(WordRear); + return sLemmaFrontCache; + } + } + /// + /// word to be lemmatized by standard Rear-Lemmatizer (it's beggining has been already modified by Front-Lemmatizer) + /// + public string WordRear + { + get + { + if (sWordRearCache == null) + { + int lemmaPos = 0, wordPos = 0; + var common = LongestCommonSubstring(sWord, sLemma, ref wordPos, ref lemmaPos); + sWordRearCache = lemmaPos == -1 ? sLemma : (sLemma.Substring(0, lemmaPos + common.Length) + sWord.Substring(wordPos + common.Length)); + } + return sWordRearCache; + } + } + /// + /// lemma to be produced by standard Rear-Lemmatizer from WordRear + /// + public string LemmaRear + { + get + { + return sLemma; + } + } + #endregion + + #region Essential Class Functions (joining two examples into one) + //TODO - this function is not totaly ok because sMsd should not be + //changed since it could be included in signature + public void Join(LemmaExample leJoin) + { + dWeight += leJoin.dWeight; + if (sMsd != null) + switch (lsett.eMsdConsider) + { + case LemmatizerSettings.MsdConsideration.Ignore: + sMsd = null; + break; + case LemmatizerSettings.MsdConsideration.Distinct: + break; + case LemmatizerSettings.MsdConsideration.JoinAll: + sMsd += "|" + leJoin.sMsd; + break; + case LemmatizerSettings.MsdConsideration.JoinDistinct: + var append = string.Format("|{0}", leJoin.sMsd); + if (false == sMsd.Equals(leJoin.sMsd, StringComparison.Ordinal) && + sMsd.IndexOf(append) < 0) + { + sMsd += append; + } + break; + case LemmatizerSettings.MsdConsideration.JoinSameSubstring: + int iPos = 0; + var iMax = Math.Min(sMsd.Length, leJoin.sMsd.Length); + while (iPos < iMax && sMsd[iPos] == leJoin.sMsd[iPos]) + iPos++; + sMsd = sMsd.Substring(0, iPos); + break; + default: + break; + } + + } + #endregion + + #region Essential Class Functions (calculating similarities betwen examples) + public int Similarity(LemmaExample le) + { + return Similarity(this, le); + } + public static int Similarity(LemmaExample le1, LemmaExample le2) + { + var sWord1 = le1.sWord; + var sWord2 = le2.sWord; + var iLen1 = sWord1.Length; + var iLen2 = sWord2.Length; + var iMaxLen = Math.Min(iLen1, iLen2); + for (var iPos = 1; iPos <= iMaxLen; iPos++) + { + if (sWord1[iLen1 - iPos] != sWord2[iLen2 - iPos]) + return iPos - 1; + } + //TODO similarity should be bigger if two words are totaly equal + //if (sWord1 == sWord2) + // return iMaxLen + 1; + //else + return iMaxLen; + } + #endregion + + #region Essential Class Functions (comparing examples - eg.: for sorting) + /// + /// Function used to comprare current MultextExample (ME) against argument ME. + /// Mainly used in for sorting lists of MEs. + /// + /// MultextExample (ME) that we compare current ME against. + /// 1 if current ME is bigger, -1 if smaler and 0 if both are the same. + public int CompareTo(LemmaExample other) + { + var iComparison = CompareStrings(this.sWord, other.sWord, false); + if (iComparison != 0) + return iComparison; + + iComparison = CompareStrings(this.sLemma, other.sLemma, true); + if (iComparison != 0) + return iComparison; + + if (lsett.eMsdConsider == LemmatizerSettings.MsdConsideration.Distinct && + this.sMsd != null && other.sMsd != null) + { + iComparison = CompareStrings(this.sMsd, other.sMsd, true); + if (iComparison != 0) + return iComparison; + } + return 0; + } + + public int Compare(LemmaExample x, LemmaExample y) + { + return x.CompareTo(y); + } + + public static int CompareStrings(string sStr1, string sStr2, bool bForward) + { + var iLen1 = sStr1.Length; + var iLen2 = sStr2.Length; + var iMaxLen = Math.Min(iLen1, iLen2); + if (bForward) + { + for (int iPos = 0; iPos < iMaxLen; iPos++) + { + if (sStr1[iPos] > sStr2[iPos]) + return 1; + if (sStr1[iPos] < sStr2[iPos]) + return -1; + } + } + else + { + for (int iPos = 1; iPos <= iMaxLen; iPos++) + { + if (sStr1[iLen1 - iPos] > sStr2[iLen2 - iPos]) + return 1; + if (sStr1[iLen1 - iPos] < sStr2[iLen2 - iPos]) + return -1; + } + } + if (iLen1 > iLen2) + return 1; + if (iLen1 < iLen2) + return -1; + return 0; + } + + public static int EqualPrifixLen(string sStr1, string sStr2) + { + var iLen1 = sStr1.Length; + var iLen2 = sStr2.Length; + var iMaxLen = Math.Min(iLen1, iLen2); + + for (var iPos = 0; iPos < iMaxLen; iPos++) + { + if (sStr1[iPos] != sStr2[iPos]) + return iPos; + } + return iMaxLen; + } + + public static string LongestCommonSubstring(string sStr1, string sStr2, ref int iPosInStr1, ref int iPosInStr2) + { + var l = new int[sStr1.Length + 1, sStr2.Length + 1]; + int z = 0; + string ret = ""; + iPosInStr1 = -1; + iPosInStr2 = -1; + for (var i = 0; i < sStr1.Length; i++) + { + for (var j = 0; j < sStr2.Length; j++) + { + if (sStr1[i] == sStr2[j]) + { + if (i == 0 || j == 0) + { + l[i, j] = 1; + } + else + { + l[i, j] = l[i - 1, j - 1] + 1; + } + if (l[i, j] > z) + { + z = l[i, j]; + iPosInStr1 = i - z + 1; + iPosInStr2 = j - z + 1; + ret = sStr1.Substring(i - z + 1, z); + } + } + } + } + return ret; + } + + public static string StringReverse(string s) + { + if (s == null) + return null; + var charArray = new char[s.Length]; + var len = s.Length >> 1; + for (var i = 0; i < len; i++, len--) + { + charArray[i] = s[len]; + charArray[len] = s[i]; + } + return new string(charArray); + } + #endregion + + #region Output Functions (ToString) + public override string ToString() + { + var sb = new StringBuilder(); + if (sWord != null) + sb.AppendFormat("W:\"{0}\" ", sWord); + if (sLemma != null) + sb.AppendFormat("L:\"{0}\" ", sLemma); + if (sMsd != null) + sb.AppendFormat("M:\"{0}\" ", sMsd); + if (false == Double.IsNaN(dWeight)) + sb.AppendFormat("F:\"{0}\" ", dWeight); + if (lrRule != null) + sb.AppendFormat("R:{0} ", lrRule); + if (sb.Length > 0) + return sb.ToString(0, sb.Length - 1); + return string.Empty; + } + #endregion + + #region Serialization Functions (Binary) + public void Serialize(BinaryWriter binWrt, bool bThisTopObject) + { + //save metadata + binWrt.Write(bThisTopObject); + + //save value types -------------------------------------- + binWrt.Write(sWord); + binWrt.Write(sLemma); + binWrt.Write(sSignature); + if (sMsd == null) + { + binWrt.Write(false); + } + else + { + binWrt.Write(true); + binWrt.Write(sMsd); + } + binWrt.Write(dWeight); + //save refernce types if needed ------------------------- + if (bThisTopObject) + { + lsett.Serialize(binWrt); + lrRule.Serialize(binWrt, false); + } + } + public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett, LemmaRule lrRule) + { + //load metadata + var bThisTopObject = binRead.ReadBoolean(); + + //load value types -------------------------------------- + sWord = binRead.ReadString(); + sLemma = binRead.ReadString(); + sSignature = binRead.ReadString(); + if (binRead.ReadBoolean()) + sMsd = binRead.ReadString(); + else + sMsd = null; + dWeight = binRead.ReadDouble(); + + //load refernce types if needed ------------------------- + if (bThisTopObject) + { + this.lsett = new LemmatizerSettings(binRead); + this.lrRule = new LemmaRule(binRead, this.lsett); + } + else + { + this.lsett = lsett; + this.lrRule = lrRule; + } + this.sWordRearCache = null; + this.sWordFrontCache = null; + this.sLemmaFrontCache = null; + } + + public LemmaExample(BinaryReader binRead, LemmatizerSettings lsett, LemmaRule lrRule) + { + Deserialize(binRead, lsett, lrRule); + } + #endregion + + #region Serialization Functions (Latino) +#if LATINO + + public void Save(Latino.BinarySerializer binWrt, bool bThisTopObject) { + //save metadata + binWrt.WriteBool(bThisTopObject); + + //save value types -------------------------------------- + binWrt.WriteString(sWord); + binWrt.WriteString(sLemma); + binWrt.WriteString(sSignature); + if (sMsd == null) + binWrt.WriteBool(false); + else { + binWrt.WriteBool(true); + binWrt.WriteString(sMsd); + } + binWrt.WriteDouble(dWeight); + + //save refernce types if needed ------------------------- + if (bThisTopObject) { + lsett.Save(binWrt); + lrRule.Save(binWrt, false); + } + } + public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett, LemmaRule lrRule) { + //load metadata + bool bThisTopObject = binRead.ReadBool(); + + //load value types -------------------------------------- + sWord = binRead.ReadString(); + sLemma = binRead.ReadString(); + sSignature = binRead.ReadString(); + if (binRead.ReadBool()) + sMsd = binRead.ReadString(); + else + sMsd = null; + dWeight = binRead.ReadDouble(); + + //load refernce types if needed ------------------------- + if (bThisTopObject) { + this.lsett = new LemmatizerSettings(binRead); + this.lrRule = new LemmaRule(binRead, this.lsett); + } + else { + this.lsett = lsett; + this.lrRule = lrRule; + } + + } + public LemmaExample(Latino.BinarySerializer binRead, LemmatizerSettings lsett, LemmaRule lrRule) { + Load(binRead, lsett, lrRule); + } + +#endif + #endregion + } +} + + diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/LemmaRule.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/LemmaRule.cs new file mode 100644 index 0000000..722ea85 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/LemmaRule.cs @@ -0,0 +1,189 @@ +using System; +using System.IO; + +namespace LemmaSharp +{ + public class LemmaRule + { + #region Private Variables + private int iId; + private int iFrom; + private string sFrom; + private string sTo; + private string sSignature; + private LemmatizerSettings lsett; + #endregion + + #region Constructor(s) + public LemmaRule(string sWord, string sLemma, int iId, LemmatizerSettings lsett) + { + this.lsett = lsett; + this.iId = iId; + + int iSameStem = SameStem(sWord, sLemma); + sTo = sLemma.Substring(iSameStem); + iFrom = sWord.Length - iSameStem; + + if (lsett.bUseFromInRules) + { + sFrom = sWord.Substring(iSameStem); + sSignature = string.Format("[{0}]==>[{1}]", sFrom, sTo); + } + else + { + sFrom = null; + sSignature = string.Format("[#{0}]==>[{1}]", iFrom, sTo); + } + } + #endregion + + #region Public Properties + public string Signature + { + get + { + return sSignature; + } + } + public int Id + { + get + { + return iId; + } + } + #endregion + + #region Essential Class Functions + private static int SameStem(string sStr1, string sStr2) + { + var iLen1 = sStr1.Length; + var iLen2 = sStr2.Length; + var iMaxLen = Math.Min(iLen1, iLen2); + for (var iPos = 0; iPos < iMaxLen; iPos++) + { + if (sStr1[iPos] != sStr2[iPos]) + return iPos; + } + return iMaxLen; + } + public bool IsApplicableToGroup(int iGroupCondLen) + { + return iGroupCondLen >= iFrom; + } + public string Lemmatize(string sWord) + { + return sWord.Substring(0, sWord.Length - iFrom) + sTo; + } + #endregion + + #region Output Functions (ToString) + public override string ToString() + { + return string.Format("{0}:{1}", iId, sSignature); + } + #endregion + + #region Serialization Functions (Binary) + public void Serialize(BinaryWriter binWrt, bool bThisTopObject) + { + //save metadata + binWrt.Write(bThisTopObject); + + //save value types -------------------------------------- + binWrt.Write(iId); + binWrt.Write(iFrom); + if (sFrom == null) + binWrt.Write(false); + else + { + binWrt.Write(true); + binWrt.Write(sFrom); + } + binWrt.Write(sTo); + binWrt.Write(sSignature); + + if (bThisTopObject) + lsett.Serialize(binWrt); + } + public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett) + { + //load metadata + var bThisTopObject = binRead.ReadBoolean(); + + //load value types -------------------------------------- + iId = binRead.ReadInt32(); + iFrom = binRead.ReadInt32(); + if (binRead.ReadBoolean()) + { + sFrom = binRead.ReadString(); + } + else + { + sFrom = null; + } + sTo = binRead.ReadString(); + sSignature = binRead.ReadString(); + //load refernce types if needed ------------------------- + if (bThisTopObject) + this.lsett = new LemmatizerSettings(binRead); + else + this.lsett = lsett; + } + + public LemmaRule(System.IO.BinaryReader binRead, LemmatizerSettings lsett) + { + this.Deserialize(binRead, lsett); + } + #endregion + + #region Serialization Functions (Latino) +#if LATINO + + public void Save(Latino.BinarySerializer binWrt, bool bThisTopObject) { + //save metadata + binWrt.WriteBool(bThisTopObject); + + //save value types -------------------------------------- + binWrt.WriteInt(iId); + binWrt.WriteInt(iFrom); + if (sFrom == null) + binWrt.WriteBool(false); + else { + binWrt.WriteBool(true); + binWrt.WriteString(sFrom); + } + binWrt.WriteString(sTo); + binWrt.WriteString(sSignature); + + if (bThisTopObject) + lsett.Save(binWrt); + } + public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett) { + //load metadata + bool bThisTopObject = binRead.ReadBool(); + + //load value types -------------------------------------- + iId = binRead.ReadInt(); + iFrom = binRead.ReadInt(); + if (binRead.ReadBool()) + sFrom = binRead.ReadString(); + else + sFrom = null; + sTo = binRead.ReadString(); + sSignature = binRead.ReadString(); + + //load refernce types if needed ------------------------- + if (bThisTopObject) + this.lsett = new LemmatizerSettings(binRead); + else + this.lsett = lsett; + } + public LemmaRule(Latino.BinarySerializer binRead, LemmatizerSettings lsett) { + Load(binRead, lsett); + } + +#endif + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/LemmaTreeNode.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/LemmaTreeNode.cs new file mode 100644 index 0000000..7991548 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/LemmaTreeNode.cs @@ -0,0 +1,478 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace LemmaSharp +{ + [Serializable] + public class LemmaTreeNode : ILemmatizerModel + { + #region Private Variables + //settings + private LemmatizerSettings lsett; + //tree structure references + private Dictionary dictSubNodes; + private LemmaTreeNode ltnParentNode; + //essential node properties + private int iSimilarity; //similarity among all words in this node + private string sCondition; //suffix that must match in order to lemmatize + private bool bWholeWord; //true if condition has to match to whole word + //rules and weights; + private LemmaRule lrBestRule; //the best rule to be applied when lemmatizing + private RuleWeighted[] aBestRules; //list of best rules + private double dWeight; + //source of this node + private int iStart; + private int iEnd; + private ExampleList elExamples; + #endregion + + #region Constructor(s) & Destructor(s) + private LemmaTreeNode(LemmatizerSettings lsett) + { + this.lsett = lsett; + } + public LemmaTreeNode(LemmatizerSettings lsett, ExampleList elExamples) + : this(lsett, elExamples, 0, elExamples.Count - 1, null) + { + } + /// + /// + /// + /// + /// + /// Index of the first word of the current group + /// Index of the last word of the current group + /// + private LemmaTreeNode(LemmatizerSettings lsett, ExampleList elExamples, int iStart, int iEnd, LemmaTreeNode ltnParentNode) : this(lsett) + { + this.ltnParentNode = ltnParentNode; + this.dictSubNodes = null; + this.iStart = iStart; + this.iEnd = iEnd; + this.elExamples = elExamples; + if (iStart >= elExamples.Count || iEnd >= elExamples.Count || iStart > iEnd) + { + lrBestRule = elExamples.Rules.DefaultRule; + aBestRules = new RuleWeighted[1]; + aBestRules[0] = new RuleWeighted(lrBestRule, 0); + dWeight = 0; + return; + } + int iConditionLength = Math.Min(ltnParentNode == null ? 0 : ltnParentNode.iSimilarity + 1, elExamples[iStart].Word.Length); + this.sCondition = elExamples[iStart].Word.Substring(elExamples[iStart].Word.Length - iConditionLength); + this.iSimilarity = elExamples[iStart].Similarity(elExamples[iEnd]); + this.bWholeWord = ltnParentNode == null ? false : elExamples[iEnd].Word.Length == ltnParentNode.iSimilarity; + FindBestRules(); + AddSubAll(); + //TODO check this heuristics, can be problematic when there are more applicable rules + if (dictSubNodes != null) + { + var lReplaceNodes = new List>(); + foreach (var kvpChild in dictSubNodes) + if (kvpChild.Value.dictSubNodes != null && kvpChild.Value.dictSubNodes.Count == 1) + { + var enumChildChild = kvpChild.Value.dictSubNodes.Values.GetEnumerator(); + enumChildChild.MoveNext(); + var ltrChildChild = enumChildChild.Current; + if (kvpChild.Value.lrBestRule == lrBestRule) + lReplaceNodes.Add(new KeyValuePair(kvpChild.Key, ltrChildChild)); + } + foreach (var kvpChild in lReplaceNodes) + { + dictSubNodes[kvpChild.Key] = kvpChild.Value; + kvpChild.Value.ltnParentNode = this; + } + } + } + #endregion + + #region Public Properties + public int TreeSize + { + get + { + int iCount = 1; + if (dictSubNodes != null) + { + foreach (var ltnChild in dictSubNodes.Values) + { + iCount += ltnChild.TreeSize; + } + } + return iCount; + } + } + public double Weight + { + get + { + return dWeight; + } + } + + #endregion + + #region Essential Class Functions (building model) + private void FindBestRules() + { + /* + * LINQ SPEED TEST (Slower than current metodology) + * + List leApplicable = new List(); + for (int iExm = iStart; iExm <= iEnd; iExm++) + if (elExamples[iExm].Rule.IsApplicableToGroup(sCondition.Length)) + leApplicable.Add(elExamples[iExm]); + + List> lBestRules = new List>(); + lBestRules.AddRange( + leApplicable. + GroupBy>( + le => le.Rule, + le => le.Weight, + (lr, enumDbl) => new KeyValuePair(lr, enumDbl.Aggregate((acc, curr) => acc + curr)) + ). + OrderBy(kvpLrWght=>kvpLrWght.Value) + ); + + if (lBestRules.Count > 0) + lrBestRule = lBestRules[0].Key; + else { + lrBestRule = elExamples.Rules.DefaultRule; + + } + */ + dWeight = 0; + //calculate dWeight of whole node and calculates qualities for all rules + var dictApplicableRules = new Dictionary(); + //dictApplicableRules.Add(elExamples.Rules.DefaultRule, 0); + while (dictApplicableRules.Count == 0) + { + for (var iExm = iStart; iExm <= iEnd; iExm++) + { + var lr = elExamples[iExm].Rule; + var dExmWeight = elExamples[iExm].Weight; + dWeight += dExmWeight; + if (lr.IsApplicableToGroup(sCondition.Length)) + { + if (dictApplicableRules.ContainsKey(lr)) + dictApplicableRules[lr] += dExmWeight; + else + dictApplicableRules.Add(lr, dExmWeight); + } + } + //if none found then increase condition length or add some default appliable rule + if (dictApplicableRules.Count == 0) + { + if (this.sCondition.Length < iSimilarity) + this.sCondition = elExamples[iStart].Word.Substring(elExamples[iStart].Word.Length - (sCondition.Length + 1)); + else + //TODO preveri hevristiko, mogoce je bolje ce se doda default rule namesto rulea od starsa + dictApplicableRules.Add(ltnParentNode.lrBestRule, 0); + } + } + //TODO can optimize this step using sorted list (dont add if it's worse than the worst) + var lSortedRules = new List(); + foreach (var kvp in dictApplicableRules) + { + lSortedRules.Add(new RuleWeighted(kvp.Key, kvp.Value / dWeight)); + } + lSortedRules.Sort(); + + //keep just best iMaxRulesPerNode rules + var iNumRules = lSortedRules.Count; + if (lsett.iMaxRulesPerNode > 0) + iNumRules = Math.Min(lSortedRules.Count, lsett.iMaxRulesPerNode); + + aBestRules = new RuleWeighted[iNumRules]; + for (var iRule = 0; iRule < iNumRules; iRule++) + { + aBestRules[iRule] = lSortedRules[iRule]; + } + + //set best rule + lrBestRule = aBestRules[0].Rule; + + //TODO must check if this hevristics is OK (to privilige parent rule) + if (ltnParentNode != null) + { + for (int iRule = 0; iRule < lSortedRules.Count && + lSortedRules[iRule].Weight == lSortedRules[0].Weight; iRule++) + { + if (lSortedRules[iRule].Rule == ltnParentNode.lrBestRule) + { + lrBestRule = lSortedRules[iRule].Rule; + break; + } + } + } + } + + private void AddSubAll() + { + int iStartGroup = iStart; + var chCharPrev = '\0'; + var bSubGroupNeeded = false; + for (var iWrd = iStart; iWrd <= iEnd; iWrd++) + { + var sWord = elExamples[iWrd].Word; + var chCharThis = sWord.Length > iSimilarity ? sWord[sWord.Length - 1 - iSimilarity] : '\0'; + if (iWrd != iStart && chCharPrev != chCharThis) + { + if (bSubGroupNeeded) + { + AddSub(iStartGroup, iWrd - 1, chCharPrev); + bSubGroupNeeded = false; + } + iStartGroup = iWrd; + } + + //TODO check out bSubGroupNeeded when there are multiple posible rules (not just lrBestRule) + if (elExamples[iWrd].Rule != lrBestRule) + { + bSubGroupNeeded = true; + } + chCharPrev = chCharThis; + } + if (bSubGroupNeeded && iStartGroup != iStart) + { + AddSub(iStartGroup, iEnd, chCharPrev); + } + } + + private void AddSub(int iStart, int iEnd, char chChar) + { + var ltnSub = new LemmaTreeNode(lsett, elExamples, iStart, iEnd, this); + + //TODO - maybe not realy appropriate because loosing statisitcs from multiple possible rules + if (ltnSub.lrBestRule == lrBestRule && ltnSub.dictSubNodes == null) + return; + + if (dictSubNodes == null) + dictSubNodes = new Dictionary(); + dictSubNodes.Add(chChar, ltnSub); + } + #endregion + + #region Essential Class Functions (running model = lemmatizing) + public bool ConditionSatisfied(string sWord) + { + //if (bWholeWord) + // return sWord == sCondition; + //else + // return sWord.EndsWith(sCondition); + + var iDiff = sWord.Length - sCondition.Length; + if (iDiff < 0 || (bWholeWord && iDiff > 0)) + return false; + + var iWrdEnd = sCondition.Length - ltnParentNode.sCondition.Length - 1; + for (var iChar = 0; iChar < iWrdEnd; iChar++) + { + if (sCondition[iChar] != sWord[iChar + iDiff]) + return false; + } + return true; + } + public string Lemmatize(string sWord) + { + if (sWord.Length >= iSimilarity && dictSubNodes != null) + { + char chChar = sWord.Length > iSimilarity ? sWord[sWord.Length - 1 - iSimilarity] : '\0'; + if (dictSubNodes.ContainsKey(chChar) && dictSubNodes[chChar].ConditionSatisfied(sWord)) + return dictSubNodes[chChar].Lemmatize(sWord); + } + return lrBestRule.Lemmatize(sWord); + } + + #endregion + + #region Output Functions (ToString) + public override string ToString() + { + var sb = new StringBuilder(); + ToString(sb, 0); + return sb.ToString(); + } + + private void ToString(StringBuilder sb, int iLevel) + { + sb.Append(new string('\t', iLevel)); + sb.AppendFormat("Suffix=\"{0}{1}\"; ", bWholeWord ? "^" : string.Empty, sCondition); + sb.AppendFormat("Rule=\"{0}\"; ", lrBestRule); + sb.AppendFormat("Weight=\"{0}\"; ", dWeight); + if (aBestRules != null && aBestRules.Length > 0) + sb.AppendFormat("Cover={0}; ", aBestRules[0].Weight); + sb.Append("Rulles="); + if (aBestRules != null) + { + foreach (var rw in aBestRules) + sb.AppendFormat(" {0}", rw); + } + sb.Append("; "); + sb.AppendLine(); + if (dictSubNodes != null) + { + foreach (var ltnChild in dictSubNodes.Values) + { + ltnChild.ToString(sb, iLevel + 1); + } + } + } + #endregion + + #region Serialization Functions (Binary) + public void Serialize(BinaryWriter binWrt) + { + binWrt.Write(dictSubNodes != null); + if (dictSubNodes != null) + { + binWrt.Write(dictSubNodes.Count); + foreach (var kvp in dictSubNodes) + { + binWrt.Write(kvp.Key); + kvp.Value.Serialize(binWrt); + } + } + binWrt.Write(iSimilarity); + binWrt.Write(sCondition); + binWrt.Write(bWholeWord); + binWrt.Write(lrBestRule.Signature); + binWrt.Write(aBestRules.Length); + for (var i = 0; i < aBestRules.Length; i++) + { + binWrt.Write(aBestRules[i].Rule.Signature); + binWrt.Write(aBestRules[i].Weight); + } + binWrt.Write(dWeight); + binWrt.Write(iStart); + binWrt.Write(iEnd); + } + + public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode) + { + this.lsett = lsett; + if (binRead.ReadBoolean()) + { + dictSubNodes = new Dictionary(); + var iCount = binRead.ReadInt32(); + for (var i = 0; i < iCount; i++) + { + var cKey = binRead.ReadChar(); + var ltrSub = new LemmaTreeNode(binRead, this.lsett, elExamples, this); + dictSubNodes.Add(cKey, ltrSub); + } + } + else + { + dictSubNodes = null; + } + this.ltnParentNode = ltnParentNode; + + iSimilarity = binRead.ReadInt32(); + sCondition = binRead.ReadString(); + bWholeWord = binRead.ReadBoolean(); + lrBestRule = elExamples.Rules[binRead.ReadString()]; + + var iCountBest = binRead.ReadInt32(); + aBestRules = new RuleWeighted[iCountBest]; + for (var i = 0; i < iCountBest; i++) + { + aBestRules[i] = + new RuleWeighted(elExamples.Rules[binRead.ReadString()], binRead.ReadDouble()); + } + dWeight = binRead.ReadDouble(); + iStart = binRead.ReadInt32(); + iEnd = binRead.ReadInt32(); + this.elExamples = elExamples; + } + public LemmaTreeNode(BinaryReader binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode) + { + Deserialize(binRead, lsett, elExamples, ltnParentNode); + } + #endregion + + #region Serialization Functions (Latino) +#if LATINO + public void Save(Latino.BinarySerializer binWrt) { + binWrt.WriteBool(dictSubNodes != null); + if (dictSubNodes != null) { + binWrt.WriteInt(dictSubNodes.Count); + foreach (KeyValuePair kvp in dictSubNodes) { + binWrt.WriteChar(kvp.Key); + kvp.Value.Save(binWrt); + } + } + + binWrt.WriteInt(iSimilarity); + binWrt.WriteString(sCondition); + binWrt.WriteBool(bWholeWord); + + binWrt.WriteString(lrBestRule.Signature); + binWrt.WriteInt(aBestRules.Length); + for (int i = 0; i < aBestRules.Length; i++) { + binWrt.WriteString(aBestRules[i].Rule.Signature); + binWrt.WriteDouble(aBestRules[i].Weight); + } + binWrt.WriteDouble(dWeight); + + binWrt.WriteInt(iStart); + binWrt.WriteInt(iEnd); + } + public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode) { + this.lsett = lsett; + + if (binRead.ReadBool()) { + dictSubNodes = new Dictionary(); + int iCount = binRead.ReadInt(); + for (int i = 0; i < iCount; i++) { + char cKey = binRead.ReadChar(); + LemmaTreeNode ltrSub = new LemmaTreeNode(binRead, this.lsett, elExamples, this); + dictSubNodes.Add(cKey, ltrSub); + } + } + else + dictSubNodes = null; + + this.ltnParentNode = ltnParentNode; + + iSimilarity = binRead.ReadInt(); + sCondition = binRead.ReadString(); + bWholeWord = binRead.ReadBool(); + + lrBestRule = elExamples.Rules[binRead.ReadString()]; + + int iCountBest = binRead.ReadInt(); + aBestRules = new RuleWeighted[iCountBest]; + for (int i = 0; i < iCountBest; i++) + aBestRules[i] = new RuleWeighted(elExamples.Rules[binRead.ReadString()], binRead.ReadDouble()); + + dWeight = binRead.ReadDouble(); + + iStart = binRead.ReadInt(); + iEnd = binRead.ReadInt(); + this.elExamples = elExamples; + + } + public LemmaTreeNode(Latino.BinarySerializer binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode) { + Load(binRead, lsett, elExamples, ltnParentNode); + } +#endif + #endregion + + #region Other (Temporarly) + //TODO - this is temp function, remove it + public bool CheckConsistency() + { + var bReturn = true; + if (dictSubNodes != null) + foreach (var ltnChild in dictSubNodes.Values) + bReturn = bReturn && + ltnChild.CheckConsistency() && + ltnChild.sCondition.EndsWith(sCondition); + return bReturn; + } + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/Lemmatizer.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/Lemmatizer.cs new file mode 100644 index 0000000..b63632a --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/Lemmatizer.cs @@ -0,0 +1,465 @@ +using System; +using System.Collections.Generic; +using System.Text; +using System.IO; +using System.Runtime.Serialization; +using System.IO.Compression; +using SevenZip; + +namespace LemmaSharp +{ + [Serializable] + public class Lemmatizer : ITrainableLemmatizer +#if LATINO + , Latino.ISerializable +#endif + { + + #region Private Variables + protected LemmatizerSettings lsett; + protected ExampleList elExamples; + protected LemmaTreeNode ltnRootNode; + protected LemmaTreeNode ltnRootNodeFront; + #endregion + + #region Constructor(s) + public Lemmatizer() : + this(new LemmatizerSettings()) + { } + + public Lemmatizer(LemmatizerSettings lsett) + { + this.lsett = lsett; + this.elExamples = new ExampleList(lsett); + this.ltnRootNode = null; + this.ltnRootNodeFront = null; + } + + public Lemmatizer(StreamReader srIn, string sFormat, LemmatizerSettings lsett) : this(lsett) + { + AddMultextFile(srIn, sFormat); + } + #endregion + + #region Private Properties + private LemmaTreeNode ltrRootNodeSafe + { + get + { + if (ltnRootNode == null) + BuildModel(); + return ltnRootNode; + } + } + private LemmaTreeNode ltrRootNodeFrontSafe + { + get + { + if (ltnRootNodeFront == null && lsett.bBuildFrontLemmatizer) + BuildModel(); + return ltnRootNodeFront; + } + } + #endregion + + #region Public Properties + public LemmatizerSettings Settings + { + get + { + return lsett.CloneDeep(); + } + } + public ExampleList Examples + { + get + { + return elExamples; + } + } + public RuleList Rules + { + get + { + return elExamples.Rules; + } + } + public LemmaTreeNode RootNode + { + get + { + return ltrRootNodeSafe; + } + } + public LemmaTreeNode RootNodeFront + { + get + { + return ltrRootNodeFrontSafe; + } + } + public ILemmatizerModel Model + { + get + { + return ltrRootNodeSafe; + } + } + #endregion + + #region Essential Class Functions (adding examples to repository) + public void AddMultextFile(StreamReader srIn, string sFormat) + { + this.elExamples.AddMultextFile(srIn, sFormat); + ltnRootNode = null; + } + public void AddExample(string sWord, string sLemma) + { + AddExample(sWord, sLemma, 1, null); + } + public void AddExample(string sWord, string sLemma, double dWeight) + { + AddExample(sWord, sLemma, dWeight, null); + } + public void AddExample(string sWord, string sLemma, double dWeight, string sMsd) + { + elExamples.AddExample(sWord, sLemma, dWeight, sMsd); + ltnRootNode = null; + } + public void DropExamples() + { + elExamples.DropExamples(); + } + public void FinalizeAdditions() + { + elExamples.FinalizeAdditions(); + } + #endregion + + #region Essential Class Functions (building model & lemmatizing) + public void BuildModel() + { + if (ltnRootNode != null) + return; + + if (!lsett.bBuildFrontLemmatizer) + { + //TODO remove: elExamples.FinalizeAdditions(); + elExamples.FinalizeAdditions(); + ltnRootNode = new LemmaTreeNode(lsett, elExamples); + } + else + { + ltnRootNode = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(false)); + ltnRootNodeFront = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(true)); + } + } + + public string Lemmatize(string sWord) + { + if (!lsett.bBuildFrontLemmatizer) + { + return ltrRootNodeSafe.Lemmatize(sWord); + } + var sWordFront = LemmaExample.StringReverse(sWord); + var sLemmaFront = ltrRootNodeFrontSafe.Lemmatize(sWordFront); + var sWordRear = LemmaExample.StringReverse(sLemmaFront); + return ltrRootNodeSafe.Lemmatize(sWordRear); + } + #endregion + + #region Serialization Functions (ISerializable) + public void GetObjectData(SerializationInfo info, StreamingContext context) + { + info.AddValue("lsett", lsett); + info.AddValue("elExamples", elExamples); + } + + public Lemmatizer(SerializationInfo info, StreamingContext context) : this() + { + lsett = (LemmatizerSettings)info.GetValue("lsett", typeof(LemmatizerSettings)); + elExamples = (ExampleList)info.GetValue("elExamples", typeof(ExampleList)); + this.BuildModel(); + } + #endregion + + #region Serialization Functions (Binary) + public void Serialize(BinaryWriter binWrt, bool bSerializeExamples) + { + lsett.Serialize(binWrt); + binWrt.Write(bSerializeExamples); + elExamples.Serialize(binWrt, bSerializeExamples, false); + + if (!bSerializeExamples) + { + elExamples.GetFrontRearExampleList(false).Serialize(binWrt, bSerializeExamples, false); + elExamples.GetFrontRearExampleList(true).Serialize(binWrt, bSerializeExamples, false); + } + ltnRootNode.Serialize(binWrt); + if (lsett.bBuildFrontLemmatizer) + ltnRootNodeFront.Serialize(binWrt); + } + + public void Deserialize(BinaryReader binRead) + { + lsett = new LemmatizerSettings(binRead); + + var bSerializeExamples = binRead.ReadBoolean(); + elExamples = new ExampleList(binRead, lsett); + + ExampleList elExamplesRear; + ExampleList elExamplesFront; + + if (bSerializeExamples) + { + elExamplesRear = elExamples.GetFrontRearExampleList(false); + elExamplesFront = elExamples.GetFrontRearExampleList(true); + } + else + { + elExamplesRear = new ExampleList(binRead, lsett); + elExamplesFront = new ExampleList(binRead, lsett); + } + + if (!lsett.bBuildFrontLemmatizer) + { + ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples, null); + } + else + { + ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamplesRear, null); + ltnRootNodeFront = new LemmaTreeNode(binRead, lsett, elExamplesFront, null); + } + } + + //Do not change the order!!! (If new compression algorithms are added, otherwise you will not be able to load old files.) + public enum Compression + { + None, + Deflate, + LZMA + } + + public Lemmatizer(BinaryReader binRead) + { + var compr = (Compression)binRead.ReadByte(); + if (compr == Compression.None) + Deserialize(binRead); + else + throw new Exception("Loading lemmatizer with binary reader on uncompressed stream is not supported."); + } + + public Lemmatizer(Stream streamIn) + { + Deserialize(streamIn); + } + + public void Serialize(Stream streamOut) + { + Serialize(streamOut, true, Compression.None); + } + public void Serialize(Stream streamOut, bool bSerializeExamples) + { + Serialize(streamOut, bSerializeExamples, Compression.None); + } + public void Serialize(Stream streamOut, bool bSerializeExamples, Compression compress) + { + streamOut.WriteByte((byte)compress); + switch (compress) + { + case Compression.None: + SerializeNone(streamOut, bSerializeExamples); + break; + case Compression.Deflate: + SerializeDeflate(streamOut, bSerializeExamples); + break; + case Compression.LZMA: + SerializeLZMA(streamOut, bSerializeExamples); + break; + default: + break; + } + } + + private void SerializeNone(Stream streamOut, bool bSerializeExamples) + { + using (var binWrt = new BinaryWriter(streamOut)) + { + this.Serialize(binWrt, bSerializeExamples); + } + } + + private void SerializeDeflate(Stream streamOut, bool bSerializeExamples) + { + using (var streamOutNew = new DeflateStream(streamOut, CompressionMode.Compress, true)) + { + using (var binWrt = new BinaryWriter(streamOutNew)) + { + this.Serialize(binWrt, bSerializeExamples); + binWrt.Flush(); + binWrt.Close(); + } + } + } + + private void SerializeLZMA(Stream streamOut, bool bSerializeExamples) + { + CoderPropID[] propIDs = + { + CoderPropID.DictionarySize, + CoderPropID.PosStateBits, + CoderPropID.LitContextBits, + CoderPropID.LitPosBits, + CoderPropID.Algorithm, + CoderPropID.NumFastBytes, + CoderPropID.MatchFinder, + CoderPropID.EndMarker + }; + + Int32 dictionary = 1 << 23; + Int32 posStateBits = 2; + Int32 litContextBits = 3; // for normal files + Int32 litPosBits = 0; + Int32 algorithm = 2; + Int32 numFastBytes = 128; + var mf = "bt4"; + var eos = false; + + object[] properties = + { + (Int32)(dictionary), + (Int32)(posStateBits), + (Int32)(litContextBits), + (Int32)(litPosBits), + (Int32)(algorithm), + (Int32)(numFastBytes), + mf, + eos + }; + + using (var msTemp = new MemoryStream()) + { + using (var binWrtTemp = new BinaryWriter(msTemp)) + { + this.Serialize(binWrtTemp, bSerializeExamples); + msTemp.Position = 0; + var encoder = new SevenZip.Compression.LZMA.Encoder(); + encoder.SetCoderProperties(propIDs, properties); + encoder.WriteCoderProperties(streamOut); + var fileSize = msTemp.Length; + for (int i = 0; i < 8; i++) + { + streamOut.WriteByte((Byte)(fileSize >> (8 * i))); + } + encoder.Code(msTemp, streamOut, -1, -1, null); + binWrtTemp.Close(); + encoder = null; + } + msTemp.Close(); + } + } + + public void Deserialize(Stream streamIn) + { + var compr = (Compression)streamIn.ReadByte(); + using (var streamInNew = Decompress(streamIn, compr)) + { + using (var br = new BinaryReader(streamInNew)) + { + Deserialize(br); + } + } + } + + private Stream Decompress(Stream streamIn, Compression compress) + { + Stream streamInNew; + switch (compress) + { + case Compression.None: + default: + streamInNew = streamIn; + break; + case Compression.Deflate: + streamInNew = new DeflateStream(streamIn, CompressionMode.Decompress); + break; + case Compression.LZMA: + streamInNew = DecompressLZMA(streamIn); + break; + } + return streamInNew; + } + + private Stream DecompressLZMA(Stream streamIn) + { + var properties = new byte[5]; + if (streamIn.Read(properties, 0, 5) != 5) + throw new Exception("input .lzma is too short"); + var decoder = new SevenZip.Compression.LZMA.Decoder(); + decoder.SetDecoderProperties(properties); + + long outSize = 0; + for (var i = 0; i < 8; i++) + { + var v = streamIn.ReadByte(); + if (v < 0) + throw (new Exception("Can't Read 1")); + outSize |= ((long)(byte)v) << (8 * i); + } + var compressedSize = streamIn.Length - streamIn.Position; + var outStream = new MemoryStream(); + decoder.Code(streamIn, outStream, compressedSize, outSize, null); + outStream.Seek(0, 0); + decoder = null; + return outStream; + } + #endregion + + #region Serialization Functions (Latino) +#if LATINO + + public void Save(Latino.BinarySerializer binWrt) { + lsett.Save(binWrt); + + elExamples.Save(binWrt, true, false); + + ltnRootNode.Save(binWrt); + if (lsett.bBuildFrontLemmatizer) + ltnRootNodeFront.Save(binWrt); + } + + public void Load(Latino.BinarySerializer binRead) { + lsett = new LemmatizerSettings(binRead); + elExamples = new ExampleList(binRead, lsett); + if (!lsett.bBuildFrontLemmatizer) { + ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples, null); + } + else { + ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples.GetFrontRearExampleList(false) , null); + ltnRootNodeFront = new LemmaTreeNode(binRead, lsett, elExamples.GetFrontRearExampleList(true), null); + } + } + + public Lemmatizer(Latino.BinarySerializer binRead) { + Load(binRead); + } + + public void Save(Stream streamOut) { + Latino.BinarySerializer binWrt = new Latino.BinarySerializer(streamOut); + this.Save(binWrt); + binWrt.Close(); + } + public void Load(Stream streamIn) { + Latino.BinarySerializer binRead = new Latino.BinarySerializer(streamIn); + Load(binRead); + binRead.Close(); + } + + public Lemmatizer(Stream streamIn, string sDummy) { + Load(streamIn); + } + +#endif + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/LemmatizerSettings.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/LemmatizerSettings.cs new file mode 100644 index 0000000..04ed7a0 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/LemmatizerSettings.cs @@ -0,0 +1,143 @@ +using System; +using System.IO; +using System.Runtime.Serialization; + +namespace LemmaSharp +{ + /// + /// These are the lemmagen algorithm settings that affect speed/power of the learning and lemmatizing algorithm. + /// TODO this class will be probbably removed in the future. + /// + [Serializable] + public class LemmatizerSettings : ISerializable + { + #region Constructor(s) + public LemmatizerSettings() + { + } + #endregion + + #region Sub-Structures + /// + /// How algorithm considers msd tags. + /// + public enum MsdConsideration + { + /// + /// Completely ignores mds tags (join examples with different tags and sum their weihgts). + /// + Ignore, + /// + /// Same examples with different msd's are not considered equal and joined. + /// + Distinct, + /// + /// Joins examples with different tags (concatenates all msd tags). + /// + JoinAll, + /// + /// Joins examples with different tags (concatenates just distinct msd tags - somehow slower). + /// + JoinDistinct, + /// + /// Joins examples with different tags (new tag is the left to right substring that all joined examples share). + /// + JoinSameSubstring + } + #endregion + + #region Public Variables + /// + /// True if from string should be included in rule identifier ([from]->[to]). False if just length of from string is used ([#len]->[to]). + /// + public bool bUseFromInRules = true; + /// + /// Specification how algorithm considers msd tags. + /// + public MsdConsideration eMsdConsider = MsdConsideration.Distinct; + /// + /// How many of the best rules are kept in memory for each node. Zero means unlimited. + /// + public int iMaxRulesPerNode = 0; + /// + /// If true, than build proccess uses few more hevristics to build first left to right lemmatizer (lemmatizes front of the word) + /// + public bool bBuildFrontLemmatizer = false; + #endregion + + #region Cloneable functions + public LemmatizerSettings CloneDeep() + { + return new LemmatizerSettings() + { + bUseFromInRules = this.bUseFromInRules, + eMsdConsider = this.eMsdConsider, + iMaxRulesPerNode = this.iMaxRulesPerNode, + bBuildFrontLemmatizer = this.bBuildFrontLemmatizer + }; + } + #endregion + + #region Serialization Functions (ISerializable) + public void GetObjectData(SerializationInfo info, StreamingContext context) + { + info.AddValue("bUseFromInRules", bUseFromInRules); + info.AddValue("eMsdConsider", eMsdConsider); + info.AddValue("iMaxRulesPerNode", iMaxRulesPerNode); + info.AddValue("bBuildFrontLemmatizer", bBuildFrontLemmatizer); + } + public LemmatizerSettings(SerializationInfo info, StreamingContext context) + { + bUseFromInRules = info.GetBoolean("bUseFromInRules"); + eMsdConsider = (MsdConsideration)info.GetValue("eMsdConsider", typeof(MsdConsideration)); + iMaxRulesPerNode = info.GetInt32("iMaxRulesPerNode"); + bBuildFrontLemmatizer = info.GetBoolean("bBuildFrontLemmatizer"); + } + #endregion + + #region Serialization Functions (Binary) + public void Serialize(BinaryWriter binWrt) + { + binWrt.Write(bUseFromInRules); + binWrt.Write((int)eMsdConsider); + binWrt.Write(iMaxRulesPerNode); + binWrt.Write(bBuildFrontLemmatizer); + } + public void Deserialize(BinaryReader binRead) + { + bUseFromInRules = binRead.ReadBoolean(); + eMsdConsider = (MsdConsideration)binRead.ReadInt32(); + iMaxRulesPerNode = binRead.ReadInt32(); + bBuildFrontLemmatizer = binRead.ReadBoolean(); + } + public LemmatizerSettings(System.IO.BinaryReader binRead) + { + this.Deserialize(binRead); + } + #endregion + + #region Serialization Functions (Latino) +#if LATINO + + public void Save(Latino.BinarySerializer binWrt) { + binWrt.WriteBool(bUseFromInRules); + binWrt.WriteInt((int)eMsdConsider); + binWrt.WriteInt(iMaxRulesPerNode); + binWrt.WriteBool(bBuildFrontLemmatizer); + } + + public void Load(Latino.BinarySerializer binRead) { + bUseFromInRules = binRead.ReadBool(); + eMsdConsider = (MsdConsideration)binRead.ReadInt(); + iMaxRulesPerNode = binRead.ReadInt(); + bBuildFrontLemmatizer = binRead.ReadBool(); + } + + public LemmatizerSettings(Latino.BinarySerializer reader) { + Load(reader); + } + +#endif + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/RuleList.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/RuleList.cs new file mode 100644 index 0000000..fbaad6f --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/RuleList.cs @@ -0,0 +1,161 @@ +using System.Collections.Generic; +using System.IO; + +namespace LemmaSharp +{ + public class RuleList : Dictionary + { + #region Private Variables + private LemmatizerSettings lsett; + private LemmaRule lrDefaultRule; + #endregion + + #region Constructor(s) + public RuleList(LemmatizerSettings lsett) + { + this.lsett = lsett; + lrDefaultRule = AddRule(new LemmaRule("", "", 0, lsett)); + } + #endregion + + #region Public Properties + public LemmaRule DefaultRule + { + get + { + return lrDefaultRule; + } + } + #endregion + + #region Essential Class Functions + public LemmaRule AddRule(LemmaExample le) + { + return AddRule(new LemmaRule(le.Word, le.Lemma, this.Count, lsett)); + } + private LemmaRule AddRule(LemmaRule lrRuleNew) + { + LemmaRule lrRuleReturn = null; + if (!this.TryGetValue(lrRuleNew.Signature, out lrRuleReturn)) + { + lrRuleReturn = lrRuleNew; + this.Add(lrRuleReturn.Signature, lrRuleReturn); + } + return lrRuleReturn; + } + #endregion + + #region Serialization Functions (Binary) + public void Serialize(BinaryWriter binWrt, bool bThisTopObject) + { + //save metadata + binWrt.Write(bThisTopObject); + + //save value types -------------------------------------- + + //save refernce types if needed ------------------------- + if (bThisTopObject) + lsett.Serialize(binWrt); + + //save list items --------------------------------------- + var iCount = this.Count; + binWrt.Write(iCount); + foreach (var kvp in this) + { + binWrt.Write(kvp.Key); + kvp.Value.Serialize(binWrt, false); + } + //default rule is already saved in the list. Here just save its id. + binWrt.Write(lrDefaultRule.Signature); + } + + public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett) + { + //load metadata + var bThisTopObject = binRead.ReadBoolean(); + + //load value types -------------------------------------- + + //load refernce types if needed ------------------------- + if (bThisTopObject) + this.lsett = new LemmatizerSettings(binRead); + else + this.lsett = lsett; + + //load list items --------------------------------------- + this.Clear(); + int iCount = binRead.ReadInt32(); + for (var iId = 0; iId < iCount; iId++) + { + var sKey = binRead.ReadString(); + var lrVal = new LemmaRule(binRead, this.lsett); + this.Add(sKey, lrVal); + } + + //link the default rule just Id was saved. + lrDefaultRule = this[binRead.ReadString()]; + } + + public RuleList(System.IO.BinaryReader binRead, LemmatizerSettings lsett) + { + this.Deserialize(binRead, lsett); + } + #endregion + + #region Serialization Functions (Latino) +#if LATINO + + public void Save(Latino.BinarySerializer binWrt, bool bThisTopObject) { + //save metadata + binWrt.WriteBool(bThisTopObject); + + //save value types -------------------------------------- + + //save refernce types if needed ------------------------- + if (bThisTopObject) + lsett.Save(binWrt); + + //save list items --------------------------------------- + int iCount = this.Count; + binWrt.WriteInt(iCount); + foreach (KeyValuePair kvp in this) { + binWrt.WriteString(kvp.Key); + kvp.Value.Save(binWrt, false); + } + + //default rule is already saved in the list. Here just save its id. + binWrt.WriteString(lrDefaultRule.Signature); + } + public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett) { + //load metadata + bool bThisTopObject = binRead.ReadBool(); + + //load value types -------------------------------------- + + //load refernce types if needed ------------------------- + if (bThisTopObject) + this.lsett = new LemmatizerSettings(binRead); + else + this.lsett = lsett; + + //load list items --------------------------------------- + this.Clear(); + int iCount = binRead.ReadInt(); + for (int iId = 0; iId < iCount; iId++) { + string sKey = binRead.ReadString(); + LemmaRule lrVal = new LemmaRule(binRead, this.lsett); + this.Add(sKey, lrVal); + } + + //link the default rule just Id was saved. + lrDefaultRule = this[binRead.ReadString()]; + + } + public RuleList(Latino.BinarySerializer binRead, LemmatizerSettings lsett) { + Load(binRead, lsett); + } + +#endif + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/RuleWeighted.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/RuleWeighted.cs new file mode 100644 index 0000000..c4d332b --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Classes/RuleWeighted.cs @@ -0,0 +1,50 @@ +using System; + +namespace LemmaSharp +{ + [Serializable] + class RuleWeighted : IComparable + { + #region Private Variables + private LemmaRule lrRule; + private double dWeight; + #endregion + + #region Constructor(s) + public RuleWeighted(LemmaRule lrRule, double dWeight) + { + this.lrRule = lrRule; + this.dWeight = dWeight; + } + #endregion + + #region Public Properties + public LemmaRule Rule + { + get { return lrRule; } + } + public double Weight + { + get { return dWeight; } + } + #endregion + + #region Essential Class Functions (comparing objects, eg.: for sorting) + public int CompareTo(RuleWeighted rl) + { + if (this.dWeight < rl.dWeight) return 1; + if (this.dWeight > rl.dWeight) return -1; + if (this.lrRule.Id < rl.lrRule.Id) return 1; + if (this.lrRule.Id > rl.lrRule.Id) return -1; + return 0; + } + #endregion + + #region Output & Serialization Functions + public override string ToString() + { + return string.Format("{0}{1:(0.00%)}", lrRule, dWeight); + } + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/ExternalLibs/7zipSources.7z b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/ExternalLibs/7zipSources.7z new file mode 100644 index 0000000..5463ceb Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/ExternalLibs/7zipSources.7z differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/ExternalLibs/Lzma#.dll b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/ExternalLibs/Lzma#.dll new file mode 100644 index 0000000..2bb9990 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/ExternalLibs/Lzma#.dll differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Interfaces/ILemmatizer.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Interfaces/ILemmatizer.cs new file mode 100644 index 0000000..50c13a0 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Interfaces/ILemmatizer.cs @@ -0,0 +1,9 @@ +using System.Runtime.Serialization; + +namespace LemmaSharp +{ + public interface ILemmatizer : ISerializable + { + string Lemmatize(string sWord); + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Interfaces/ILemmatizerModel.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Interfaces/ILemmatizerModel.cs new file mode 100644 index 0000000..367203c --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Interfaces/ILemmatizerModel.cs @@ -0,0 +1,8 @@ +namespace LemmaSharp +{ + public interface ILemmatizerModel + { + string Lemmatize(string sWord); + string ToString(); + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Interfaces/ILemmatizerTrainable.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Interfaces/ILemmatizerTrainable.cs new file mode 100644 index 0000000..cfd18ab --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/Interfaces/ILemmatizerTrainable.cs @@ -0,0 +1,12 @@ +namespace LemmaSharp +{ + public interface ITrainableLemmatizer : ILemmatizer + { + ExampleList Examples { get; } + ILemmatizerModel Model { get; } + void AddExample(string sWord, string sLemma); + void AddExample(string sWord, string sLemma, double dWeight); + void AddExample(string sWord, string sLemma, double dWeight, string sMsd); + void BuildModel(); + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/LatinoCompatibility/BinarySerializer.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/LatinoCompatibility/BinarySerializer.cs new file mode 100644 index 0000000..74effcb --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/LatinoCompatibility/BinarySerializer.cs @@ -0,0 +1,539 @@ +/*==========================================================================; + * + * (c) 2004-08 JSI. All rights reserved. + * + * File: BinarySerializer.cs + * Version: 1.0 + * Desc: Binary serializer + * Author: Miha Grcar + * Created on: Oct-2004 + * Last modified: May-2008 + * Revision: May-2008 + * + ***************************************************************************/ + +//Remark: Use this file as Latino compatibility checker. When it is included in +// the project it defines symbol LATINO, that should enable all Latino specific +// serialization functions. When excluded, this code will not be created and also +// following Latino namspace will not be added to the project. + + +using System; +using System.Runtime.InteropServices; +using System.Collections.Generic; +using System.Reflection; +using System.Text; +using System.IO; + +#if LATINO + +namespace Latino +{ + /* .----------------------------------------------------------------------- + | + | Class BinarySerializer + | + '----------------------------------------------------------------------- + */ + public interface ISerializable { + // *** note that you need to implement a constructor that loads the instance if the class implements Latino.ISerializable + void Save(Latino.BinarySerializer writer); + } + + public class BinarySerializer + { + private static Dictionary m_full_to_short_type_name + = new Dictionary(); + private static Dictionary m_short_to_full_type_name + = new Dictionary(); + private Stream m_stream; + private string m_data_dir + = "."; + private static void RegisterTypeName(string full_type_name, string short_type_name) + { + m_full_to_short_type_name.Add(full_type_name, short_type_name); + m_short_to_full_type_name.Add(short_type_name, full_type_name); + } + private static string GetFullTypeName(string short_type_name) + { + return m_short_to_full_type_name.ContainsKey(short_type_name) ? m_short_to_full_type_name[short_type_name] : short_type_name; + } + private static string GetShortTypeName(string full_type_name) + { + return m_full_to_short_type_name.ContainsKey(full_type_name) ? m_full_to_short_type_name[full_type_name] : full_type_name; + } + static BinarySerializer() + { + RegisterTypeName(typeof(bool).AssemblyQualifiedName, "b"); + RegisterTypeName(typeof(byte).AssemblyQualifiedName, "ui1"); + RegisterTypeName(typeof(sbyte).AssemblyQualifiedName, "i1"); + RegisterTypeName(typeof(char).AssemblyQualifiedName, "c"); + RegisterTypeName(typeof(double).AssemblyQualifiedName, "f8"); + RegisterTypeName(typeof(float).AssemblyQualifiedName, "f4"); + RegisterTypeName(typeof(int).AssemblyQualifiedName, "i4"); + RegisterTypeName(typeof(uint).AssemblyQualifiedName, "ui4"); + RegisterTypeName(typeof(long).AssemblyQualifiedName, "i8"); + RegisterTypeName(typeof(ulong).AssemblyQualifiedName, "ui8"); + RegisterTypeName(typeof(short).AssemblyQualifiedName, "i2"); + RegisterTypeName(typeof(ushort).AssemblyQualifiedName, "ui2"); + RegisterTypeName(typeof(string).AssemblyQualifiedName, "s"); + } + public BinarySerializer(Stream stream) + { + //Utils.ThrowException(stream == null ? new ArgumentNullException("stream") : null); + m_stream = stream; + } + public BinarySerializer() + { + m_stream = new MemoryStream(); + } + public BinarySerializer(string file_name, FileMode file_mode) + { + m_stream = new FileStream(file_name, file_mode); // throws ArgumentException, NotSupportedException, ArgumentNullException, SecurityException, FileNotFoundException, IOException, DirectoryNotFoundException, PathTooLongException, ArgumentOutOfRangeException + } + // *** Reading *** + private byte[] Read() // Read() is directly or indirectly called from several methods thus exceptions thrown here can also be thrown in all those methods + { + int sz = Marshal.SizeOf(typeof(T)); + byte[] buffer = new byte[sz]; + int num_bytes = m_stream.Read(buffer, 0, sz); // throws IOException, NotSupportedException, ObjectDisposedException + //Utils.ThrowException(num_bytes < sz ? new EndOfStreamException() : null); + return buffer; + } + public bool ReadBool() + { + return ReadByte() != 0; + } + public byte ReadByte() // ReadByte() is directly or indirectly called from several methods thus exceptions thrown here can also be thrown in all those methods + { + int val = m_stream.ReadByte(); // throws NotSupportedException, ObjectDisposedException + //Utils.ThrowException(val < 0 ? new EndOfStreamException() : null); + return (byte)val; + } + public sbyte ReadSByte() + { + return (sbyte)ReadByte(); + } + private char ReadChar8() + { + return (char)ReadByte(); + } + private char ReadChar16() + { + return BitConverter.ToChar(Read(), 0); + } + public char ReadChar() + { + return ReadChar16(); + } + public double ReadDouble() + { + return BitConverter.ToDouble(Read(), 0); + } + public float ReadFloat() + { + return BitConverter.ToSingle(Read(), 0); + } + public int ReadInt() + { + return BitConverter.ToInt32(Read(), 0); + } + public uint ReadUInt() + { + return BitConverter.ToUInt32(Read(), 0); + } + public long ReadLong() + { + return BitConverter.ToInt64(Read(), 0); + } + public ulong ReadULong() + { + return BitConverter.ToUInt64(Read(), 0); + } + public short ReadShort() + { + return BitConverter.ToInt16(Read(), 0); + } + public ushort ReadUShort() + { + return BitConverter.ToUInt16(Read(), 0); + } + private string ReadString8() + { + int len = ReadInt(); + if (len < 0) { return null; } + byte[] buffer = new byte[len]; + m_stream.Read(buffer, 0, len); // throws IOException, NotSupportedException, ObjectDisposedException + return Encoding.ASCII.GetString(buffer); + } + private string ReadString16() + { + int len = ReadInt(); + if (len < 0) { return null; } + byte[] buffer = new byte[len * 2]; + m_stream.Read(buffer, 0, len * 2); // throws IOException, NotSupportedException, ObjectDisposedException + return Encoding.Unicode.GetString(buffer); + } + public string ReadString() + { + return ReadString16(); // throws exceptions (see ReadString16()) + } + public Type ReadType() + { + string type_name = ReadString8(); // throws exceptions (see ReadString8()) + //Utils.ThrowException(type_name == null ? new InvalidDataException() : null); + return Type.GetType(GetFullTypeName(type_name)); // throws TargetInvocationException, ArgumentException, TypeLoadException, FileNotFoundException, FileLoadException, BadImageFormatException + } + public ValueType ReadValue(Type type) + { + //Utils.ThrowException(type == null ? new ArgumentNullException("type") : null); + //Utils.ThrowException(!type.IsValueType ? new InvalidArgumentValueException("type") : null); + if (type == typeof(bool)) + { + return ReadBool(); + } + else if (type == typeof(byte)) + { + return ReadByte(); + } + else if (type == typeof(sbyte)) + { + return ReadSByte(); + } + else if (type == typeof(char)) + { + return ReadChar(); + } + else if (type == typeof(double)) + { + return ReadDouble(); + } + else if (type == typeof(float)) + { + return ReadFloat(); + } + else if (type == typeof(int)) + { + return ReadInt(); + } + else if (type == typeof(uint)) + { + return ReadUInt(); + } + else if (type == typeof(long)) + { + return ReadLong(); + } + else if (type == typeof(ulong)) + { + return ReadULong(); + } + else if (type == typeof(short)) + { + return ReadShort(); + } + else if (type == typeof(ushort)) + { + return ReadUShort(); + } + else if (typeof(Latino.ISerializable).IsAssignableFrom(type)) + { + ConstructorInfo cxtor = type.GetConstructor(new Type[] { typeof(Latino.BinarySerializer) }); + //Utils.ThrowException(cxtor == null ? new ArgumentNotSupportedException("type") : null); + return (ValueType)cxtor.Invoke(new object[] { this }); // throws MemberAccessException, MethodAccessException, TargetInvocationException, NotSupportedException, SecurityException + } + else + { + //throw new ArgumentNotSupportedException("type"); + throw new Exception("type"); + } + } + public T ReadValue() + { + return (T)(object)ReadValue(typeof(T)); // throws exceptions (see ReadValue(Type type)) + } + public object ReadObject(Type type) + { + //Utils.ThrowException(type == null ? new ArgumentNullException("type") : null); + switch (ReadByte()) + { + case 0: + return null; + case 1: + break; + case 2: + Type type_0 = ReadType(); // throws exceptions (see ReadType()) + //Utils.ThrowException(type_0 == null ? new TypeLoadException() : null); + //Utils.ThrowException(!type.IsAssignableFrom(type_0) ? new InvalidArgumentValueException("type") : null); + type = type_0; + break; + default: + throw new InvalidDataException(); + } + if (type == typeof(string)) + { + return ReadString(); + } + else if (typeof(Latino.ISerializable).IsAssignableFrom(type)) + { + ConstructorInfo cxtor = type.GetConstructor(new Type[] { typeof(Latino.BinarySerializer) }); + //Utils.ThrowException(cxtor == null ? new ArgumentNotSupportedException("type") : null); + return cxtor.Invoke(new object[] { this }); // throws MemberAccessException, MethodAccessException, TargetInvocationException, NotSupportedException, SecurityException + } + else if (type.IsValueType) + { + return ReadValue(type); // throws exceptions (see ReadValue(Type type)) + } + else + { + //throw new InvalidArgumentValueException("type"); + throw new Exception("type"); + } + } + public T ReadObject() + { + return (T)ReadObject(typeof(T)); // throws exceptions (see ReadObject(Type type)) + } + public object ReadValueOrObject(Type type) + { + //Utils.ThrowException(type == null ? new ArgumentNullException("type") : null); + if (type.IsValueType) + { + return ReadValue(type); // throws exceptions (see ReadValue(Type type)) + } + else + { + return ReadObject(type); // throws exceptions (see ReadObject(Type type)) + } + } + public T ReadValueOrObject() + { + return (T)ReadValueOrObject(typeof(T)); // throws exceptions (see ReadValueOrObject(Type type)) + } + // *** Writing *** + private void Write(byte[] data) // Write(byte[] data) is directly or indirectly called from several methods thus exceptions thrown here can also be thrown in all those methods + { + m_stream.Write(data, 0, data.Length); // throws IOException, NotSupportedException, ObjectDisposedException + } + public void WriteBool(bool val) + { + WriteByte(val ? (byte)1 : (byte)0); + } + public void WriteByte(byte val) // WriteByte(byte val) is directly or indirectly called from several methods thus exceptions thrown here can also be thrown in all those methods + { + m_stream.WriteByte(val); // throws IOException, NotSupportedException, ObjectDisposedException + } + public void WriteSByte(sbyte val) + { + WriteByte((byte)val); + } + private void WriteChar8(char val) + { + WriteByte(Encoding.ASCII.GetBytes(new char[] { val })[0]); + } + private void WriteChar16(char val) + { + Write(BitConverter.GetBytes((ushort)val)); + } + public void WriteChar(char val) + { + WriteChar16(val); + } + public void WriteDouble(double val) + { + Write(BitConverter.GetBytes(val)); + } + public void WriteFloat(float val) + { + Write(BitConverter.GetBytes(val)); + } + public void WriteInt(int val) + { + Write(BitConverter.GetBytes(val)); + } + public void WriteUInt(uint val) + { + Write(BitConverter.GetBytes(val)); + } + public void WriteLong(long val) + { + Write(BitConverter.GetBytes(val)); + } + public void WriteULong(ulong val) + { + Write(BitConverter.GetBytes(val)); + } + public void WriteShort(short val) + { + Write(BitConverter.GetBytes(val)); + } + public void WriteUShort(ushort val) + { + Write(BitConverter.GetBytes(val)); + } + private void WriteString8(string val) + { + if (val == null) { WriteInt(-1); return; } + WriteInt(val.Length); + Write(Encoding.ASCII.GetBytes(val)); + } + private void WriteString16(string val) + { + if (val == null) { WriteInt(-1); return; } + WriteInt(val.Length); + Write(Encoding.Unicode.GetBytes(val)); + } + public void WriteString(string val) + { + WriteString16(val); + } + public void WriteValue(ValueType val) + { + if (val is bool) + { + WriteBool((bool)val); + } + else if (val is byte) + { + WriteByte((byte)val); + } + else if (val is sbyte) + { + WriteSByte((sbyte)val); + } + else if (val is char) + { + WriteChar((char)val); + } + else if (val is double) + { + WriteDouble((double)val); + } + else if (val is float) + { + WriteFloat((float)val); + } + else if (val is int) + { + WriteInt((int)val); + } + else if (val is uint) + { + WriteUInt((uint)val); + } + else if (val is long) + { + WriteLong((long)val); + } + else if (val is ulong) + { + WriteULong((ulong)val); + } + else if (val is short) + { + WriteShort((short)val); + } + else if (val is ushort) + { + WriteUShort((ushort)val); + } + else if (val is Latino.ISerializable) + { + ((Latino.ISerializable)val).Save(this); // throws serialization-related exceptions + } + else + { + //throw new ArgumentTypeException("val"); + } + } + public void WriteObject(Type type, object obj) + { + //Utils.ThrowException(type == null ? new ArgumentNullException("type") : null); + //Utils.ThrowException((obj != null && !type.IsAssignableFrom(obj.GetType())) ? new ArgumentTypeException("obj") : null); + if (obj == null) + { + WriteByte(0); + } + else + { + Type obj_type = obj.GetType(); + if (obj_type == type) + { + WriteByte(1); + } + else + { + WriteByte(2); + WriteType(obj_type); + } + if (obj is string) + { + WriteString((string)obj); + } + else if (obj is Latino.ISerializable) + { + ((Latino.ISerializable)obj).Save(this); // throws serialization-related exceptions + } + else if (obj is ValueType) + { + WriteValue((ValueType)obj); // throws exceptions (see WriteValue(ValueType val)) + } + else + { + //throw new ArgumentTypeException("obj"); + } + } + } + public void WriteObject(T obj) + { + WriteObject(typeof(T), obj); // throws exceptions (see WriteObject(Type type, object obj)) + } + public void WriteValueOrObject(Type type, object obj) + { + //Utils.ThrowException(type == null ? new ArgumentNullException("type") : null); + //Utils.ThrowException(!type.IsAssignableFrom(obj.GetType()) ? new ArgumentTypeException("obj") : null); + if (type.IsValueType) + { + WriteValue((ValueType)obj); // throws exceptions (see WriteValue(ValueType val)) + } + else + { + WriteObject(type, obj); // throws exceptions (see WriteObject(Type type, object obj)) + } + } + public void WriteValueOrObject(T obj) + { + WriteValueOrObject(typeof(T), obj); // throws exceptions (see WriteValueOrObject(Type type, object obj)) + } + public void WriteType(Type type) + { + //Utils.ThrowException(type == null ? new ArgumentNullException("type") : null); + WriteString8(GetShortTypeName(type.AssemblyQualifiedName)); + } + // *** Data directory *** + public string DataDir + { + get { return m_data_dir; } + set + { + //Utils.ThrowException(!Utils.VerifyPathName(value, /*must_exist=*/true) ? new InvalidArgumentValueException("DataDir") : null); + m_data_dir = value; + } + } + // *** Access to the associated stream *** + public void Close() + { + m_stream.Close(); + } + public void Flush() + { + m_stream.Flush(); // throws IOException + } + public Stream Stream + { + get { return m_stream; } + } + } +} + +#endif diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/LemmaSharp.csproj b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/LemmaSharp.csproj new file mode 100644 index 0000000..5a58d6e --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharp/LemmaSharp.csproj @@ -0,0 +1,165 @@ + + + + Debug + AnyCPU + 9.0.21022 + 2.0 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4} + Library + Properties + LemmaSharp + LemmaSharp + v4.5 + 512 + true + + + + + + + 3.5 + + http://localhost/LemmaSharp/ + true + Web + true + Foreground + 7 + Days + false + false + true + 0 + 1.0.0.%2a + false + true + + + true + full + false + bin\Debug\ + TRACE;DEBUG;NOLATINO + prompt + 4 + false + + + pdbonly + true + bin\Release\ + TRACE;NOLATINO + prompt + 4 + false + + + true + bin\x86\Debug\ + DEBUG;TRACE + full + x86 + true + GlobalSuppressions.cs + prompt + false + + + bin\x86\Release\ + TRACE + true + pdbonly + x86 + true + GlobalSuppressions.cs + prompt + false + + + true + bin\x64\Debug\ + DEBUG;TRACE + full + x64 + true + GlobalSuppressions.cs + prompt + false + + + bin\x64\Release\ + TRACE + true + pdbonly + x64 + true + GlobalSuppressions.cs + prompt + false + + + + False + ExternalLibs\Lzma#.dll + + + + + + + + Code + + + + + + + + + + + + + + + + False + .NET Framework Client Profile + false + + + False + .NET Framework 2.0 %28x86%29 + true + + + False + .NET Framework 3.0 %28x86%29 + false + + + False + .NET Framework 3.5 + false + + + False + .NET Framework 3.5 SP1 + false + + + + + + + + \ No newline at end of file diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Classes/LanguagePrebuilt.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Classes/LanguagePrebuilt.cs new file mode 100644 index 0000000..056d89c --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Classes/LanguagePrebuilt.cs @@ -0,0 +1,28 @@ +namespace LemmaSharp +{ + public enum LanguagePrebuilt + { + //from Multext-East v4 lexicons + Bulgarian, + Czech, + English, + Estonian, + Persian, + French, + Hungarian, + Macedonian, + Polish, + Romanian, + Russian, + Slovak, + Slovene, + Serbian, + Ukrainian, + //from Multext lexicons + EnglishMT, + FrenchMT, + German, + Italian, + Spanish, + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Classes/LemmatizerPrebuilt.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Classes/LemmatizerPrebuilt.cs new file mode 100644 index 0000000..5844d21 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Classes/LemmatizerPrebuilt.cs @@ -0,0 +1,118 @@ +using System; +using System.IO; +using System.Reflection; +using System.Runtime.Serialization; + +namespace LemmaSharp +{ + [Serializable] + public abstract class LemmatizerPrebuilt : Lemmatizer + { + #region Private Variables + private static string[] asLangMapping = new string[] { + "bg", "mlteast", + "cs", "mlteast", + "en", "mlteast", + "et", "mlteast", + "fa", "mlteast", + "fr", "mlteast", + "hu", "mlteast", + "mk", "mlteast", + "pl", "mlteast", + "ro", "mlteast", + "ru", "mlteast", + "sk", "mlteast", + "sl", "mlteast", + "sr", "mlteast", + "uk", "mlteast", + "en", "multext", + "fr", "multext", + "ge", "multext", + "it", "multext", + "sp", "multext", + }; + private LanguagePrebuilt lang; + #endregion + + #region Constructor(s) + + public LemmatizerPrebuilt(LanguagePrebuilt lang) + : base() + { + this.lang = lang; + } + + public LemmatizerPrebuilt(LanguagePrebuilt lang, LemmatizerSettings lsett) + : base(lsett) + { + this.lang = lang; + } + + #endregion + + #region Private Properties Helping Functions + protected string GetResourceFileName(string sFileMask) + { + return GetResourceFileName(sFileMask, lang); + } + + public static string GetResourceFileName(string sFileMask, LanguagePrebuilt lang) + { + string langFileName = asLangMapping[(int)lang * 2 + 1] + '-' + asLangMapping[(int)lang * 2]; + return string.Format(sFileMask, langFileName); + } + #endregion + + #region Public Properties + public LanguagePrebuilt Language + { + get + { + return lang; + } + } + public LexiconPrebuilt Lexicon + { + get + { + return GetLexicon(lang); + } + } + #endregion + + #region Public Properties + public static LexiconPrebuilt GetLexicon(LanguagePrebuilt lang) + { + return (LexiconPrebuilt)Enum.Parse(typeof(LexiconPrebuilt), asLangMapping[((int)lang) * 2 + 1], true); + } + #endregion + + #region Resource Management Functions + protected abstract Assembly GetExecutingAssembly(); + + protected Stream GetResourceStream(string sResourceShortName) + { + var assembly = GetExecutingAssembly(); + string sResourceName = null; + foreach (var sResource in assembly.GetManifestResourceNames()) + { + if (sResource.EndsWith(sResourceShortName)) + { + sResourceName = sResource; + break; + } + } + if (String.IsNullOrEmpty(sResourceName)) + return null; + return assembly.GetManifestResourceStream(sResourceName); + } + #endregion + + #region Serialization Functions + public LemmatizerPrebuilt(SerializationInfo info, StreamingContext context) + : base(info, context) + { + } + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Classes/LemmatizerPrebuiltCompact.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Classes/LemmatizerPrebuiltCompact.cs new file mode 100644 index 0000000..c51b176 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Classes/LemmatizerPrebuiltCompact.cs @@ -0,0 +1,29 @@ +using System; +using System.IO; +using System.Reflection; + +namespace LemmaSharp +{ + [Serializable] + public class LemmatizerPrebuiltCompact : LemmatizerPrebuilt + { + public const string FILEMASK = "compact7z-{0}.lem"; + + #region Constructor(s) & Destructor(s) + public LemmatizerPrebuiltCompact(LanguagePrebuilt lang) + : base(lang) + { + Stream stream = GetResourceStream(GetResourceFileName(FILEMASK)); + this.Deserialize(stream); + stream.Close(); + } + #endregion + + #region Resource Management Functions + protected override Assembly GetExecutingAssembly() + { + return Assembly.GetExecutingAssembly(); + } + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Classes/LexiconPrebuilt.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Classes/LexiconPrebuilt.cs new file mode 100644 index 0000000..1a432fc --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Classes/LexiconPrebuilt.cs @@ -0,0 +1,8 @@ +namespace LemmaSharp +{ + public enum LexiconPrebuilt + { + MltEast, + Multext + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-bg.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-bg.lem new file mode 100644 index 0000000..61bd81f Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-bg.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-cs.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-cs.lem new file mode 100644 index 0000000..04dd498 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-cs.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-en.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-en.lem new file mode 100644 index 0000000..bd1998e Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-en.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-et.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-et.lem new file mode 100644 index 0000000..a43d653 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-et.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-fa.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-fa.lem new file mode 100644 index 0000000..7bbd004 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-fa.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-fr.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-fr.lem new file mode 100644 index 0000000..67e37a2 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-fr.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-hu.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-hu.lem new file mode 100644 index 0000000..f679405 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-hu.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-mk.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-mk.lem new file mode 100644 index 0000000..1d01efb Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-mk.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-pl.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-pl.lem new file mode 100644 index 0000000..6c3f46d Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-pl.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-ro.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-ro.lem new file mode 100644 index 0000000..660b689 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-ro.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-ru.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-ru.lem new file mode 100644 index 0000000..181e0f0 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-ru.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-sk.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-sk.lem new file mode 100644 index 0000000..5db7955 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-sk.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-sl.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-sl.lem new file mode 100644 index 0000000..1761a72 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-sl.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-sr.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-sr.lem new file mode 100644 index 0000000..3cbe416 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-sr.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-uk.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-uk.lem new file mode 100644 index 0000000..8e28496 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-mlteast-uk.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-multext-en.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-multext-en.lem new file mode 100644 index 0000000..a4e1da9 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-multext-en.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-multext-fr.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-multext-fr.lem new file mode 100644 index 0000000..6c8ee18 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-multext-fr.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-multext-ge.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-multext-ge.lem new file mode 100644 index 0000000..576b2b8 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-multext-ge.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-multext-it.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-multext-it.lem new file mode 100644 index 0000000..8cf6f74 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-multext-it.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-multext-sp.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-multext-sp.lem new file mode 100644 index 0000000..87fae81 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/Data/compact7z-multext-sp.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/LemmaSharpPrebuilt.csproj b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/LemmaSharpPrebuilt.csproj new file mode 100644 index 0000000..de2625e --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/LemmaSharpPrebuilt.csproj @@ -0,0 +1,132 @@ + + + + Debug + AnyCPU + 9.0.21022 + 2.0 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564} + Library + Properties + LemmaSharp + LemmaSharpPrebuilt + v4.5 + 512 + + + + + 3.5 + publish\ + true + Disk + false + Foreground + 7 + Days + false + false + true + 0 + 1.0.0.%2a + false + false + true + + + + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + false + + + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + false + + + true + bin\x86\Debug\ + DEBUG;TRACE + full + x86 + true + GlobalSuppressions.cs + prompt + false + + + bin\x86\Release\ + TRACE + true + pdbonly + x86 + true + GlobalSuppressions.cs + prompt + false + + + true + bin\x64\Debug\ + DEBUG;TRACE + full + x64 + true + GlobalSuppressions.cs + prompt + false + + + bin\x64\Release\ + TRACE + true + pdbonly + x64 + true + GlobalSuppressions.cs + prompt + false + + + + + + + + + + + + + + + False + .NET Framework 3.5 SP1 + true + + + + + {a39293c1-92d8-47b9-93a4-41f443b4f9e4} + LemmaSharp + + + + + \ No newline at end of file diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/LemmaSharpPrebuiltCompact.csproj b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/LemmaSharpPrebuiltCompact.csproj new file mode 100644 index 0000000..67bca39 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuilt/LemmaSharpPrebuiltCompact.csproj @@ -0,0 +1,111 @@ + + + + Debug + AnyCPU + 9.0.21022 + 2.0 + {9BA3F2C4-5DAB-4D7B-B431-B072A0D8FC6A} + Library + Properties + LemmaSharpPrebuiltCompact + LemmaSharpPrebuiltCompact + v4.5 + 512 + + + + + 3.5 + + publish\ + true + Disk + false + Foreground + 7 + Days + false + false + true + 0 + 1.0.0.%2a + false + false + true + + + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + false + + + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + false + + + + + + + + + {A39293C1-92D8-47B9-93A4-41F443B4F9E4} + LemmaSharp + + + {1E700D21-62D3-4525-93FE-C1FB0A1B0564} + LemmaSharpPrebuilt + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + False + .NET Framework 3.5 SP1 + true + + + + + \ No newline at end of file diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuiltCompact.sln b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuiltCompact.sln new file mode 100644 index 0000000..a9502af --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltCompact/LemmaSharpPrebuiltCompact.sln @@ -0,0 +1,58 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 14 +VisualStudioVersion = 14.0.25420.1 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LemmaSharp", "LemmaSharp\LemmaSharp.csproj", "{A39293C1-92D8-47B9-93A4-41F443B4F9E4}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LemmaSharpPrebuiltCompact", "LemmaSharpPrebuilt\LemmaSharpPrebuiltCompact.csproj", "{9BA3F2C4-5DAB-4D7B-B431-B072A0D8FC6A}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LemmaSharpPrebuilt", "LemmaSharpPrebuilt\LemmaSharpPrebuilt.csproj", "{1E700D21-62D3-4525-93FE-C1FB0A1B0564}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|Any CPU.Build.0 = Debug|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x64.ActiveCfg = Debug|x64 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x64.Build.0 = Debug|x64 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x86.ActiveCfg = Debug|x86 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x86.Build.0 = Debug|x86 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|Any CPU.ActiveCfg = Release|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|Any CPU.Build.0 = Release|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x64.ActiveCfg = Release|x64 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x64.Build.0 = Release|x64 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x86.ActiveCfg = Release|x86 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x86.Build.0 = Release|x86 + {9BA3F2C4-5DAB-4D7B-B431-B072A0D8FC6A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {9BA3F2C4-5DAB-4D7B-B431-B072A0D8FC6A}.Debug|Any CPU.Build.0 = Debug|Any CPU + {9BA3F2C4-5DAB-4D7B-B431-B072A0D8FC6A}.Debug|x64.ActiveCfg = Debug|Any CPU + {9BA3F2C4-5DAB-4D7B-B431-B072A0D8FC6A}.Debug|x86.ActiveCfg = Debug|Any CPU + {9BA3F2C4-5DAB-4D7B-B431-B072A0D8FC6A}.Release|Any CPU.ActiveCfg = Release|Any CPU + {9BA3F2C4-5DAB-4D7B-B431-B072A0D8FC6A}.Release|Any CPU.Build.0 = Release|Any CPU + {9BA3F2C4-5DAB-4D7B-B431-B072A0D8FC6A}.Release|x64.ActiveCfg = Release|Any CPU + {9BA3F2C4-5DAB-4D7B-B431-B072A0D8FC6A}.Release|x86.ActiveCfg = Release|Any CPU + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|Any CPU.Build.0 = Debug|Any CPU + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|x64.ActiveCfg = Debug|x64 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|x64.Build.0 = Debug|x64 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|x86.ActiveCfg = Debug|x86 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|x86.Build.0 = Debug|x86 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|Any CPU.ActiveCfg = Release|Any CPU + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|Any CPU.Build.0 = Release|Any CPU + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|x64.ActiveCfg = Release|x64 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|x64.Build.0 = Release|x64 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|x86.ActiveCfg = Release|x86 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|x86.Build.0 = Release|x86 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/ExampleList.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/ExampleList.cs new file mode 100644 index 0000000..d6b87b5 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/ExampleList.cs @@ -0,0 +1,381 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Runtime.Serialization; +using System.Text; + +namespace LemmaSharp +{ + [Serializable] + public class ExampleList : ISerializable + { + #region Private Variables + private LemmatizerSettings lsett; + private RuleList rlRules; + private Dictionary dictExamples; + private List lstExamples; + #endregion + + #region Constructor(s) + public ExampleList(LemmatizerSettings lsett) : base() + { + this.lsett = lsett; + this.dictExamples = new Dictionary(); + this.lstExamples = null; + this.rlRules = new RuleList(lsett); + } + public ExampleList(StreamReader srIn, string sFormat, LemmatizerSettings lsett) : this(lsett) + { + AddMultextFile(srIn, sFormat); + } + #endregion + + #region Public Properties & Indexers + public LemmaExample this[int i] + { + get + { + if (lstExamples == null) + FinalizeAdditions(); + return lstExamples[i]; + } + } + public int Count + { + get + { + if (lstExamples == null) + FinalizeAdditions(); + return lstExamples.Count; + } + } + public double WeightSum + { + get + { + if (lstExamples == null) + FinalizeAdditions(); + + double dWeight = 0; + foreach (LemmaExample exm in lstExamples) + dWeight += exm.Weight; + return dWeight; + } + } + public RuleList Rules + { + get + { + return rlRules; + } + } + public List ListExamples + { + get + { + if (lstExamples == null) + FinalizeAdditions(); + return lstExamples; + } + } + #endregion + + #region Essential Class Functions (adding/removing examples) + public void AddMultextFile(StreamReader srIn, string sFormat) + { + //read from file + string sLine = null; + int iError = 0; + int iLine = 0; + var iW = sFormat.IndexOf('W'); + var iL = sFormat.IndexOf('L'); + var iM = sFormat.IndexOf('M'); + var iF = sFormat.IndexOf('F'); + var iLen = Math.Max(Math.Max(iW, iL), Math.Max(iM, iF)) + 1; + + if (iW < 0 || iL < 0) + { + throw new Exception("Can not find word and lemma location in the format specification"); + } + while ((sLine = srIn.ReadLine()) != null && iError < 50) + { + iLine++; + string[] asWords = sLine.Split(new char[] { '\t' }); + if (asWords.Length < iLen) + { + //Console.WriteLine("ERROR: Line doesn't confirm to the given format \"" + sFormat + "\"! Line " + iLine.ToString() + "."); + iError++; + continue; + } + var sWord = asWords[iW]; + var sLemma = asWords[iL]; + if (sLemma.Equals("=", StringComparison.Ordinal)) + sLemma = sWord; + string sMsd = null; + if (iM > -1) + sMsd = asWords[iM]; + double dWeight = 1; ; + if (iF > -1) + Double.TryParse(asWords[iM], out dWeight); + AddExample(sWord, sLemma, dWeight, sMsd); + } + if (iError == 50) + throw new Exception("Parsing stopped because of too many (50) errors. Check format specification"); + } + + public LemmaExample AddExample(string sWord, string sLemma, double dWeight, string sMsd) + { + string sNewMsd = lsett.eMsdConsider != LemmatizerSettings.MsdConsideration.Ignore + ? sMsd + : null; + var leNew = new LemmaExample(sWord, sLemma, dWeight, sNewMsd, rlRules, lsett); + return Add(leNew); + } + + private LemmaExample Add(LemmaExample leNew) + { + LemmaExample leReturn = null; + if (!dictExamples.TryGetValue(leNew.Signature, out leReturn)) + { + leReturn = leNew; + dictExamples.Add(leReturn.Signature, leReturn); + } + else + leReturn.Join(leNew); + lstExamples = null; + return leReturn; + } + public void DropExamples() + { + dictExamples.Clear(); + lstExamples = null; + } + public void FinalizeAdditions() + { + if (lstExamples != null) + return; + lstExamples = new List(dictExamples.Values); + lstExamples.Sort(); + } + public ExampleList GetFrontRearExampleList(bool front) + { + var elExamplesNew = new ExampleList(lsett); + foreach (var le in this.ListExamples) + { + if (front) + elExamplesNew.AddExample(le.WordFront, le.LemmaFront, le.Weight, le.Msd); + else + elExamplesNew.AddExample(le.WordRear, le.LemmaRear, le.Weight, le.Msd); + } + elExamplesNew.FinalizeAdditions(); + return elExamplesNew; + } + #endregion + + #region Output Functions (ToString) + public override string ToString() + { + var sb = new StringBuilder(); + foreach (var exm in lstExamples) + { + sb.AppendLine(exm.ToString()); + } + return sb.ToString(); + } + #endregion + + #region Serialization Functions (.Net Default - ISerializable) + public void GetObjectData(SerializationInfo info, StreamingContext context) + { + info.AddValue("lsett", lsett); + info.AddValue("iNumExamples", dictExamples.Count); + var aWords = new string[dictExamples.Count]; + var aLemmas = new string[dictExamples.Count]; + var aWeights = new double[dictExamples.Count]; + var aMsds = new string[dictExamples.Count]; + int iExm = 0; + foreach (var exm in dictExamples.Values) + { + aWords[iExm] = exm.Word; + aLemmas[iExm] = exm.Lemma; + aWeights[iExm] = exm.Weight; + aMsds[iExm] = exm.Msd; + iExm++; + } + info.AddValue("aWords", aWords); + info.AddValue("aLemmas", aLemmas); + info.AddValue("aWeights", aWeights); + info.AddValue("aMsds", aMsds); + } + public ExampleList(SerializationInfo info, StreamingContext context) + { + lsett = (LemmatizerSettings)info.GetValue("lsett", typeof(LemmatizerSettings)); + this.dictExamples = new Dictionary(); + this.lstExamples = null; + this.rlRules = new RuleList(lsett); + var aWords = (string[])info.GetValue("aWords", typeof(string[])); + var aLemmas = (string[])info.GetValue("aLemmas", typeof(string[])); + var aWeights = (double[])info.GetValue("aWeights", typeof(double[])); + var aMsds = (string[])info.GetValue("aMsds", typeof(string[])); + for (int iExm = 0; iExm < aWords.Length; iExm++) + AddExample(aWords[iExm], aLemmas[iExm], aWeights[iExm], aMsds[iExm]); + } + #endregion + + #region Serialization Functions (Binary) + public void Serialize(BinaryWriter binWrt, bool bSerializeExamples, bool bThisTopObject) + { + //save metadata + binWrt.Write(bThisTopObject); + + //save refernce types if needed ------------------------- + if (bThisTopObject) + lsett.Serialize(binWrt); + + rlRules.Serialize(binWrt, false); + + if (!bSerializeExamples) + { + binWrt.Write(false); // lstExamples == null + binWrt.Write(0); // dictExamples.Count == 0 + } + else + { + if (lstExamples == null) + { + binWrt.Write(false); // lstExamples == null + //save dictionary items + int iCount = dictExamples.Count; + binWrt.Write(iCount); + foreach (var kvp in dictExamples) + { + binWrt.Write(kvp.Value.Rule.Signature); + kvp.Value.Serialize(binWrt, false); + } + } + else + { + binWrt.Write(true); // lstExamples != null + //save list & dictionary items + var iCount = lstExamples.Count; + binWrt.Write(iCount); + foreach (var le in lstExamples) + { + binWrt.Write(le.Rule.Signature); + le.Serialize(binWrt, false); + } + } + } + } + public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett) + { + //load metadata + var bThisTopObject = binRead.ReadBoolean(); + //load refernce types if needed ------------------------- + if (bThisTopObject) + this.lsett = new LemmatizerSettings(binRead); + else + this.lsett = lsett; + + rlRules = new RuleList(binRead, this.lsett); + var bCreateLstExamples = binRead.ReadBoolean(); + lstExamples = bCreateLstExamples ? new List() : null; + dictExamples = new Dictionary(); + + //load dictionary items + var iCount = binRead.ReadInt32(); + for (var iId = 0; iId < iCount; iId++) + { + var lrRule = rlRules[binRead.ReadString()]; + var le = new LemmaExample(binRead, this.lsett, lrRule); + dictExamples.Add(le.Signature, le); + if (bCreateLstExamples) + lstExamples.Add(le); + } + } + public ExampleList(BinaryReader binRead, LemmatizerSettings lsett) + { + Deserialize(binRead, lsett); + } + #endregion + + #region Serialization Functions (Latino) +#if LATINO + public void Save(Latino.BinarySerializer binWrt, bool bSerializeExamples, bool bThisTopObject) { + //save metadata + binWrt.WriteBool(bThisTopObject); + + //save refernce types if needed ------------------------- + if (bThisTopObject) + lsett.Save(binWrt); + + rlRules.Save(binWrt, false); + + if (!bSerializeExamples) { + binWrt.WriteBool(false); // lstExamples == null + binWrt.WriteInt(0); // dictExamples.Count == 0 + } + else { + if (lstExamples == null) { + binWrt.WriteBool(false); // lstExamples == null + + //save dictionary items + int iCount = dictExamples.Count; + binWrt.WriteInt(iCount); + + foreach (KeyValuePair kvp in dictExamples) { + binWrt.WriteString(kvp.Value.Rule.Signature); + kvp.Value.Save(binWrt, false); + } + } + else { + binWrt.WriteBool(true); // lstExamples != null + + //save list & dictionary items + int iCount = lstExamples.Count; + binWrt.WriteInt(iCount); + + foreach (LemmaExample le in lstExamples) { + binWrt.WriteString(le.Rule.Signature); + le.Save(binWrt, false); + } + } + } + + } + public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett) { + //load metadata + bool bThisTopObject = binRead.ReadBool(); + + //load refernce types if needed ------------------------- + if (bThisTopObject) + this.lsett = new LemmatizerSettings(binRead); + else + this.lsett = lsett; + + rlRules = new RuleList(binRead, this.lsett); + + bool bCreateLstExamples = binRead.ReadBool(); + + lstExamples = bCreateLstExamples ? new List() : null; + dictExamples = new Dictionary(); + + //load dictionary items + int iCount = binRead.ReadInt(); + for (int iId = 0; iId < iCount; iId++) { + LemmaRule lrRule = rlRules[binRead.ReadString()]; + LemmaExample le = new LemmaExample(binRead, this.lsett, lrRule); + + dictExamples.Add(le.Signature, le); + if (bCreateLstExamples) lstExamples.Add(le); + } + + } + public ExampleList(Latino.BinarySerializer binRead, LemmatizerSettings lsett) { + Load(binRead, lsett); + } + +#endif + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/LemmaExample.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/LemmaExample.cs new file mode 100644 index 0000000..a5e06ce --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/LemmaExample.cs @@ -0,0 +1,481 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace LemmaSharp +{ + public class LemmaExample : IComparable, IComparer + { + #region Private Variables + private string sWord; + private string sLemma; + private string sSignature; + private string sMsd; + private double dWeight; + private LemmaRule lrRule; + private LemmatizerSettings lsett; + + private string sWordRearCache; + private string sWordFrontCache; + private string sLemmaFrontCache; + #endregion + + #region Constructor(s) + public LemmaExample(string sWord, string sLemma, double dWeight, string sMsd, RuleList rlRules, LemmatizerSettings lsett) + { + this.lsett = lsett; + this.sWord = sWord; + this.sLemma = sLemma; + this.sMsd = sMsd; + this.dWeight = dWeight; + this.lrRule = rlRules.AddRule(this); + switch (lsett.eMsdConsider) + { + case LemmatizerSettings.MsdConsideration.Ignore: + case LemmatizerSettings.MsdConsideration.JoinAll: + case LemmatizerSettings.MsdConsideration.JoinDistinct: + case LemmatizerSettings.MsdConsideration.JoinSameSubstring: + sSignature = string.Format("[{0}]==>[{1}]", sWord, sLemma); + break; + case LemmatizerSettings.MsdConsideration.Distinct: + default: + sSignature = string.Format("[{0}]==>[{1}]({2})", sWord, sLemma, sMsd ?? ""); + break; + } + this.sWordRearCache = null; + this.sWordFrontCache = null; + this.sLemmaFrontCache = null; + } + #endregion + + #region Public Properties + public string Word + { + get + { + return sWord; + } + } + public string Lemma + { + get + { + return sLemma; + } + } + public string Msd + { + get + { + return sMsd; + } + } + public string Signature + { + get + { + return sSignature; + } + } + public double Weight + { + get + { + return dWeight; + } + } + public LemmaRule Rule + { + get + { + return lrRule; + } + } + /// + /// Word to be pre-lemmatized with Front-Lemmatizer into LemmaFront which is then lemmatized by standard Rear-Lemmatizer (Warning it is reversed) + /// + public string WordFront + { + get + { + if (sWordFrontCache == null) + sWordFrontCache = StringReverse(sWord); + return sWordFrontCache; + } + } + /// + /// Lemma to be produced by pre-lemmatizing with Front-Lemmatizer (Warning it is reversed) + /// + public string LemmaFront + { + get + { + if (sLemmaFrontCache == null) + sLemmaFrontCache = StringReverse(WordRear); + return sLemmaFrontCache; + } + } + /// + /// word to be lemmatized by standard Rear-Lemmatizer (it's beggining has been already modified by Front-Lemmatizer) + /// + public string WordRear + { + get + { + if (sWordRearCache == null) + { + int lemmaPos = 0, wordPos = 0; + var common = LongestCommonSubstring(sWord, sLemma, ref wordPos, ref lemmaPos); + sWordRearCache = lemmaPos == -1 ? sLemma : (sLemma.Substring(0, lemmaPos + common.Length) + sWord.Substring(wordPos + common.Length)); + } + return sWordRearCache; + } + } + /// + /// lemma to be produced by standard Rear-Lemmatizer from WordRear + /// + public string LemmaRear + { + get + { + return sLemma; + } + } + #endregion + + #region Essential Class Functions (joining two examples into one) + //TODO - this function is not totaly ok because sMsd should not be + //changed since it could be included in signature + public void Join(LemmaExample leJoin) + { + dWeight += leJoin.dWeight; + if (sMsd != null) + switch (lsett.eMsdConsider) + { + case LemmatizerSettings.MsdConsideration.Ignore: + sMsd = null; + break; + case LemmatizerSettings.MsdConsideration.Distinct: + break; + case LemmatizerSettings.MsdConsideration.JoinAll: + sMsd += "|" + leJoin.sMsd; + break; + case LemmatizerSettings.MsdConsideration.JoinDistinct: + var append = string.Format("|{0}", leJoin.sMsd); + if (false == sMsd.Equals(leJoin.sMsd, StringComparison.Ordinal) && + sMsd.IndexOf(append) < 0) + { + sMsd += append; + } + break; + case LemmatizerSettings.MsdConsideration.JoinSameSubstring: + int iPos = 0; + var iMax = Math.Min(sMsd.Length, leJoin.sMsd.Length); + while (iPos < iMax && sMsd[iPos] == leJoin.sMsd[iPos]) + iPos++; + sMsd = sMsd.Substring(0, iPos); + break; + default: + break; + } + + } + #endregion + + #region Essential Class Functions (calculating similarities betwen examples) + public int Similarity(LemmaExample le) + { + return Similarity(this, le); + } + public static int Similarity(LemmaExample le1, LemmaExample le2) + { + var sWord1 = le1.sWord; + var sWord2 = le2.sWord; + var iLen1 = sWord1.Length; + var iLen2 = sWord2.Length; + var iMaxLen = Math.Min(iLen1, iLen2); + for (var iPos = 1; iPos <= iMaxLen; iPos++) + { + if (sWord1[iLen1 - iPos] != sWord2[iLen2 - iPos]) + return iPos - 1; + } + //TODO similarity should be bigger if two words are totaly equal + //if (sWord1 == sWord2) + // return iMaxLen + 1; + //else + return iMaxLen; + } + #endregion + + #region Essential Class Functions (comparing examples - eg.: for sorting) + /// + /// Function used to comprare current MultextExample (ME) against argument ME. + /// Mainly used in for sorting lists of MEs. + /// + /// MultextExample (ME) that we compare current ME against. + /// 1 if current ME is bigger, -1 if smaler and 0 if both are the same. + public int CompareTo(LemmaExample other) + { + var iComparison = CompareStrings(this.sWord, other.sWord, false); + if (iComparison != 0) + return iComparison; + + iComparison = CompareStrings(this.sLemma, other.sLemma, true); + if (iComparison != 0) + return iComparison; + + if (lsett.eMsdConsider == LemmatizerSettings.MsdConsideration.Distinct && + this.sMsd != null && other.sMsd != null) + { + iComparison = CompareStrings(this.sMsd, other.sMsd, true); + if (iComparison != 0) + return iComparison; + } + return 0; + } + + public int Compare(LemmaExample x, LemmaExample y) + { + return x.CompareTo(y); + } + + public static int CompareStrings(string sStr1, string sStr2, bool bForward) + { + var iLen1 = sStr1.Length; + var iLen2 = sStr2.Length; + var iMaxLen = Math.Min(iLen1, iLen2); + if (bForward) + { + for (int iPos = 0; iPos < iMaxLen; iPos++) + { + if (sStr1[iPos] > sStr2[iPos]) + return 1; + if (sStr1[iPos] < sStr2[iPos]) + return -1; + } + } + else + { + for (int iPos = 1; iPos <= iMaxLen; iPos++) + { + if (sStr1[iLen1 - iPos] > sStr2[iLen2 - iPos]) + return 1; + if (sStr1[iLen1 - iPos] < sStr2[iLen2 - iPos]) + return -1; + } + } + if (iLen1 > iLen2) + return 1; + if (iLen1 < iLen2) + return -1; + return 0; + } + + public static int EqualPrifixLen(string sStr1, string sStr2) + { + var iLen1 = sStr1.Length; + var iLen2 = sStr2.Length; + var iMaxLen = Math.Min(iLen1, iLen2); + + for (var iPos = 0; iPos < iMaxLen; iPos++) + { + if (sStr1[iPos] != sStr2[iPos]) + return iPos; + } + return iMaxLen; + } + + public static string LongestCommonSubstring(string sStr1, string sStr2, ref int iPosInStr1, ref int iPosInStr2) + { + var l = new int[sStr1.Length + 1, sStr2.Length + 1]; + int z = 0; + string ret = ""; + iPosInStr1 = -1; + iPosInStr2 = -1; + for (var i = 0; i < sStr1.Length; i++) + { + for (var j = 0; j < sStr2.Length; j++) + { + if (sStr1[i] == sStr2[j]) + { + if (i == 0 || j == 0) + { + l[i, j] = 1; + } + else + { + l[i, j] = l[i - 1, j - 1] + 1; + } + if (l[i, j] > z) + { + z = l[i, j]; + iPosInStr1 = i - z + 1; + iPosInStr2 = j - z + 1; + ret = sStr1.Substring(i - z + 1, z); + } + } + } + } + return ret; + } + + public static string StringReverse(string s) + { + if (s == null) return null; + char[] charArray = s.ToCharArray(); + int len = s.Length - 1; + for (int i = 0; i < len; i++, len--) + { + charArray[i] ^= charArray[len]; + charArray[len] ^= charArray[i]; + charArray[i] ^= charArray[len]; + } + return new string(charArray); + } + #endregion + + #region Output Functions (ToString) + public override string ToString() + { + var sb = new StringBuilder(); + if (sWord != null) + sb.AppendFormat("W:\"{0}\" ", sWord); + if (sLemma != null) + sb.AppendFormat("L:\"{0}\" ", sLemma); + if (sMsd != null) + sb.AppendFormat("M:\"{0}\" ", sMsd); + if (false == Double.IsNaN(dWeight)) + sb.AppendFormat("F:\"{0}\" ", dWeight); + if (lrRule != null) + sb.AppendFormat("R:{0} ", lrRule); + if (sb.Length > 0) + return sb.ToString(0, sb.Length - 1); + return string.Empty; + } + #endregion + + #region Serialization Functions (Binary) + public void Serialize(BinaryWriter binWrt, bool bThisTopObject) + { + //save metadata + binWrt.Write(bThisTopObject); + + //save value types -------------------------------------- + binWrt.Write(sWord); + binWrt.Write(sLemma); + binWrt.Write(sSignature); + if (sMsd == null) + { + binWrt.Write(false); + } + else + { + binWrt.Write(true); + binWrt.Write(sMsd); + } + binWrt.Write(dWeight); + //save refernce types if needed ------------------------- + if (bThisTopObject) + { + lsett.Serialize(binWrt); + lrRule.Serialize(binWrt, false); + } + } + public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett, LemmaRule lrRule) + { + //load metadata + var bThisTopObject = binRead.ReadBoolean(); + + //load value types -------------------------------------- + sWord = binRead.ReadString(); + sLemma = binRead.ReadString(); + sSignature = binRead.ReadString(); + if (binRead.ReadBoolean()) + sMsd = binRead.ReadString(); + else + sMsd = null; + dWeight = binRead.ReadDouble(); + + //load refernce types if needed ------------------------- + if (bThisTopObject) + { + this.lsett = new LemmatizerSettings(binRead); + this.lrRule = new LemmaRule(binRead, this.lsett); + } + else + { + this.lsett = lsett; + this.lrRule = lrRule; + } + this.sWordRearCache = null; + this.sWordFrontCache = null; + this.sLemmaFrontCache = null; + } + + public LemmaExample(BinaryReader binRead, LemmatizerSettings lsett, LemmaRule lrRule) + { + Deserialize(binRead, lsett, lrRule); + } + #endregion + + #region Serialization Functions (Latino) +#if LATINO + + public void Save(Latino.BinarySerializer binWrt, bool bThisTopObject) { + //save metadata + binWrt.WriteBool(bThisTopObject); + + //save value types -------------------------------------- + binWrt.WriteString(sWord); + binWrt.WriteString(sLemma); + binWrt.WriteString(sSignature); + if (sMsd == null) + binWrt.WriteBool(false); + else { + binWrt.WriteBool(true); + binWrt.WriteString(sMsd); + } + binWrt.WriteDouble(dWeight); + + //save refernce types if needed ------------------------- + if (bThisTopObject) { + lsett.Save(binWrt); + lrRule.Save(binWrt, false); + } + } + public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett, LemmaRule lrRule) { + //load metadata + bool bThisTopObject = binRead.ReadBool(); + + //load value types -------------------------------------- + sWord = binRead.ReadString(); + sLemma = binRead.ReadString(); + sSignature = binRead.ReadString(); + if (binRead.ReadBool()) + sMsd = binRead.ReadString(); + else + sMsd = null; + dWeight = binRead.ReadDouble(); + + //load refernce types if needed ------------------------- + if (bThisTopObject) { + this.lsett = new LemmatizerSettings(binRead); + this.lrRule = new LemmaRule(binRead, this.lsett); + } + else { + this.lsett = lsett; + this.lrRule = lrRule; + } + + } + public LemmaExample(Latino.BinarySerializer binRead, LemmatizerSettings lsett, LemmaRule lrRule) { + Load(binRead, lsett, lrRule); + } + +#endif + #endregion + } +} + + diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/LemmaRule.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/LemmaRule.cs new file mode 100644 index 0000000..722ea85 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/LemmaRule.cs @@ -0,0 +1,189 @@ +using System; +using System.IO; + +namespace LemmaSharp +{ + public class LemmaRule + { + #region Private Variables + private int iId; + private int iFrom; + private string sFrom; + private string sTo; + private string sSignature; + private LemmatizerSettings lsett; + #endregion + + #region Constructor(s) + public LemmaRule(string sWord, string sLemma, int iId, LemmatizerSettings lsett) + { + this.lsett = lsett; + this.iId = iId; + + int iSameStem = SameStem(sWord, sLemma); + sTo = sLemma.Substring(iSameStem); + iFrom = sWord.Length - iSameStem; + + if (lsett.bUseFromInRules) + { + sFrom = sWord.Substring(iSameStem); + sSignature = string.Format("[{0}]==>[{1}]", sFrom, sTo); + } + else + { + sFrom = null; + sSignature = string.Format("[#{0}]==>[{1}]", iFrom, sTo); + } + } + #endregion + + #region Public Properties + public string Signature + { + get + { + return sSignature; + } + } + public int Id + { + get + { + return iId; + } + } + #endregion + + #region Essential Class Functions + private static int SameStem(string sStr1, string sStr2) + { + var iLen1 = sStr1.Length; + var iLen2 = sStr2.Length; + var iMaxLen = Math.Min(iLen1, iLen2); + for (var iPos = 0; iPos < iMaxLen; iPos++) + { + if (sStr1[iPos] != sStr2[iPos]) + return iPos; + } + return iMaxLen; + } + public bool IsApplicableToGroup(int iGroupCondLen) + { + return iGroupCondLen >= iFrom; + } + public string Lemmatize(string sWord) + { + return sWord.Substring(0, sWord.Length - iFrom) + sTo; + } + #endregion + + #region Output Functions (ToString) + public override string ToString() + { + return string.Format("{0}:{1}", iId, sSignature); + } + #endregion + + #region Serialization Functions (Binary) + public void Serialize(BinaryWriter binWrt, bool bThisTopObject) + { + //save metadata + binWrt.Write(bThisTopObject); + + //save value types -------------------------------------- + binWrt.Write(iId); + binWrt.Write(iFrom); + if (sFrom == null) + binWrt.Write(false); + else + { + binWrt.Write(true); + binWrt.Write(sFrom); + } + binWrt.Write(sTo); + binWrt.Write(sSignature); + + if (bThisTopObject) + lsett.Serialize(binWrt); + } + public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett) + { + //load metadata + var bThisTopObject = binRead.ReadBoolean(); + + //load value types -------------------------------------- + iId = binRead.ReadInt32(); + iFrom = binRead.ReadInt32(); + if (binRead.ReadBoolean()) + { + sFrom = binRead.ReadString(); + } + else + { + sFrom = null; + } + sTo = binRead.ReadString(); + sSignature = binRead.ReadString(); + //load refernce types if needed ------------------------- + if (bThisTopObject) + this.lsett = new LemmatizerSettings(binRead); + else + this.lsett = lsett; + } + + public LemmaRule(System.IO.BinaryReader binRead, LemmatizerSettings lsett) + { + this.Deserialize(binRead, lsett); + } + #endregion + + #region Serialization Functions (Latino) +#if LATINO + + public void Save(Latino.BinarySerializer binWrt, bool bThisTopObject) { + //save metadata + binWrt.WriteBool(bThisTopObject); + + //save value types -------------------------------------- + binWrt.WriteInt(iId); + binWrt.WriteInt(iFrom); + if (sFrom == null) + binWrt.WriteBool(false); + else { + binWrt.WriteBool(true); + binWrt.WriteString(sFrom); + } + binWrt.WriteString(sTo); + binWrt.WriteString(sSignature); + + if (bThisTopObject) + lsett.Save(binWrt); + } + public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett) { + //load metadata + bool bThisTopObject = binRead.ReadBool(); + + //load value types -------------------------------------- + iId = binRead.ReadInt(); + iFrom = binRead.ReadInt(); + if (binRead.ReadBool()) + sFrom = binRead.ReadString(); + else + sFrom = null; + sTo = binRead.ReadString(); + sSignature = binRead.ReadString(); + + //load refernce types if needed ------------------------- + if (bThisTopObject) + this.lsett = new LemmatizerSettings(binRead); + else + this.lsett = lsett; + } + public LemmaRule(Latino.BinarySerializer binRead, LemmatizerSettings lsett) { + Load(binRead, lsett); + } + +#endif + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/LemmaTreeNode.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/LemmaTreeNode.cs new file mode 100644 index 0000000..7991548 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/LemmaTreeNode.cs @@ -0,0 +1,478 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace LemmaSharp +{ + [Serializable] + public class LemmaTreeNode : ILemmatizerModel + { + #region Private Variables + //settings + private LemmatizerSettings lsett; + //tree structure references + private Dictionary dictSubNodes; + private LemmaTreeNode ltnParentNode; + //essential node properties + private int iSimilarity; //similarity among all words in this node + private string sCondition; //suffix that must match in order to lemmatize + private bool bWholeWord; //true if condition has to match to whole word + //rules and weights; + private LemmaRule lrBestRule; //the best rule to be applied when lemmatizing + private RuleWeighted[] aBestRules; //list of best rules + private double dWeight; + //source of this node + private int iStart; + private int iEnd; + private ExampleList elExamples; + #endregion + + #region Constructor(s) & Destructor(s) + private LemmaTreeNode(LemmatizerSettings lsett) + { + this.lsett = lsett; + } + public LemmaTreeNode(LemmatizerSettings lsett, ExampleList elExamples) + : this(lsett, elExamples, 0, elExamples.Count - 1, null) + { + } + /// + /// + /// + /// + /// + /// Index of the first word of the current group + /// Index of the last word of the current group + /// + private LemmaTreeNode(LemmatizerSettings lsett, ExampleList elExamples, int iStart, int iEnd, LemmaTreeNode ltnParentNode) : this(lsett) + { + this.ltnParentNode = ltnParentNode; + this.dictSubNodes = null; + this.iStart = iStart; + this.iEnd = iEnd; + this.elExamples = elExamples; + if (iStart >= elExamples.Count || iEnd >= elExamples.Count || iStart > iEnd) + { + lrBestRule = elExamples.Rules.DefaultRule; + aBestRules = new RuleWeighted[1]; + aBestRules[0] = new RuleWeighted(lrBestRule, 0); + dWeight = 0; + return; + } + int iConditionLength = Math.Min(ltnParentNode == null ? 0 : ltnParentNode.iSimilarity + 1, elExamples[iStart].Word.Length); + this.sCondition = elExamples[iStart].Word.Substring(elExamples[iStart].Word.Length - iConditionLength); + this.iSimilarity = elExamples[iStart].Similarity(elExamples[iEnd]); + this.bWholeWord = ltnParentNode == null ? false : elExamples[iEnd].Word.Length == ltnParentNode.iSimilarity; + FindBestRules(); + AddSubAll(); + //TODO check this heuristics, can be problematic when there are more applicable rules + if (dictSubNodes != null) + { + var lReplaceNodes = new List>(); + foreach (var kvpChild in dictSubNodes) + if (kvpChild.Value.dictSubNodes != null && kvpChild.Value.dictSubNodes.Count == 1) + { + var enumChildChild = kvpChild.Value.dictSubNodes.Values.GetEnumerator(); + enumChildChild.MoveNext(); + var ltrChildChild = enumChildChild.Current; + if (kvpChild.Value.lrBestRule == lrBestRule) + lReplaceNodes.Add(new KeyValuePair(kvpChild.Key, ltrChildChild)); + } + foreach (var kvpChild in lReplaceNodes) + { + dictSubNodes[kvpChild.Key] = kvpChild.Value; + kvpChild.Value.ltnParentNode = this; + } + } + } + #endregion + + #region Public Properties + public int TreeSize + { + get + { + int iCount = 1; + if (dictSubNodes != null) + { + foreach (var ltnChild in dictSubNodes.Values) + { + iCount += ltnChild.TreeSize; + } + } + return iCount; + } + } + public double Weight + { + get + { + return dWeight; + } + } + + #endregion + + #region Essential Class Functions (building model) + private void FindBestRules() + { + /* + * LINQ SPEED TEST (Slower than current metodology) + * + List leApplicable = new List(); + for (int iExm = iStart; iExm <= iEnd; iExm++) + if (elExamples[iExm].Rule.IsApplicableToGroup(sCondition.Length)) + leApplicable.Add(elExamples[iExm]); + + List> lBestRules = new List>(); + lBestRules.AddRange( + leApplicable. + GroupBy>( + le => le.Rule, + le => le.Weight, + (lr, enumDbl) => new KeyValuePair(lr, enumDbl.Aggregate((acc, curr) => acc + curr)) + ). + OrderBy(kvpLrWght=>kvpLrWght.Value) + ); + + if (lBestRules.Count > 0) + lrBestRule = lBestRules[0].Key; + else { + lrBestRule = elExamples.Rules.DefaultRule; + + } + */ + dWeight = 0; + //calculate dWeight of whole node and calculates qualities for all rules + var dictApplicableRules = new Dictionary(); + //dictApplicableRules.Add(elExamples.Rules.DefaultRule, 0); + while (dictApplicableRules.Count == 0) + { + for (var iExm = iStart; iExm <= iEnd; iExm++) + { + var lr = elExamples[iExm].Rule; + var dExmWeight = elExamples[iExm].Weight; + dWeight += dExmWeight; + if (lr.IsApplicableToGroup(sCondition.Length)) + { + if (dictApplicableRules.ContainsKey(lr)) + dictApplicableRules[lr] += dExmWeight; + else + dictApplicableRules.Add(lr, dExmWeight); + } + } + //if none found then increase condition length or add some default appliable rule + if (dictApplicableRules.Count == 0) + { + if (this.sCondition.Length < iSimilarity) + this.sCondition = elExamples[iStart].Word.Substring(elExamples[iStart].Word.Length - (sCondition.Length + 1)); + else + //TODO preveri hevristiko, mogoce je bolje ce se doda default rule namesto rulea od starsa + dictApplicableRules.Add(ltnParentNode.lrBestRule, 0); + } + } + //TODO can optimize this step using sorted list (dont add if it's worse than the worst) + var lSortedRules = new List(); + foreach (var kvp in dictApplicableRules) + { + lSortedRules.Add(new RuleWeighted(kvp.Key, kvp.Value / dWeight)); + } + lSortedRules.Sort(); + + //keep just best iMaxRulesPerNode rules + var iNumRules = lSortedRules.Count; + if (lsett.iMaxRulesPerNode > 0) + iNumRules = Math.Min(lSortedRules.Count, lsett.iMaxRulesPerNode); + + aBestRules = new RuleWeighted[iNumRules]; + for (var iRule = 0; iRule < iNumRules; iRule++) + { + aBestRules[iRule] = lSortedRules[iRule]; + } + + //set best rule + lrBestRule = aBestRules[0].Rule; + + //TODO must check if this hevristics is OK (to privilige parent rule) + if (ltnParentNode != null) + { + for (int iRule = 0; iRule < lSortedRules.Count && + lSortedRules[iRule].Weight == lSortedRules[0].Weight; iRule++) + { + if (lSortedRules[iRule].Rule == ltnParentNode.lrBestRule) + { + lrBestRule = lSortedRules[iRule].Rule; + break; + } + } + } + } + + private void AddSubAll() + { + int iStartGroup = iStart; + var chCharPrev = '\0'; + var bSubGroupNeeded = false; + for (var iWrd = iStart; iWrd <= iEnd; iWrd++) + { + var sWord = elExamples[iWrd].Word; + var chCharThis = sWord.Length > iSimilarity ? sWord[sWord.Length - 1 - iSimilarity] : '\0'; + if (iWrd != iStart && chCharPrev != chCharThis) + { + if (bSubGroupNeeded) + { + AddSub(iStartGroup, iWrd - 1, chCharPrev); + bSubGroupNeeded = false; + } + iStartGroup = iWrd; + } + + //TODO check out bSubGroupNeeded when there are multiple posible rules (not just lrBestRule) + if (elExamples[iWrd].Rule != lrBestRule) + { + bSubGroupNeeded = true; + } + chCharPrev = chCharThis; + } + if (bSubGroupNeeded && iStartGroup != iStart) + { + AddSub(iStartGroup, iEnd, chCharPrev); + } + } + + private void AddSub(int iStart, int iEnd, char chChar) + { + var ltnSub = new LemmaTreeNode(lsett, elExamples, iStart, iEnd, this); + + //TODO - maybe not realy appropriate because loosing statisitcs from multiple possible rules + if (ltnSub.lrBestRule == lrBestRule && ltnSub.dictSubNodes == null) + return; + + if (dictSubNodes == null) + dictSubNodes = new Dictionary(); + dictSubNodes.Add(chChar, ltnSub); + } + #endregion + + #region Essential Class Functions (running model = lemmatizing) + public bool ConditionSatisfied(string sWord) + { + //if (bWholeWord) + // return sWord == sCondition; + //else + // return sWord.EndsWith(sCondition); + + var iDiff = sWord.Length - sCondition.Length; + if (iDiff < 0 || (bWholeWord && iDiff > 0)) + return false; + + var iWrdEnd = sCondition.Length - ltnParentNode.sCondition.Length - 1; + for (var iChar = 0; iChar < iWrdEnd; iChar++) + { + if (sCondition[iChar] != sWord[iChar + iDiff]) + return false; + } + return true; + } + public string Lemmatize(string sWord) + { + if (sWord.Length >= iSimilarity && dictSubNodes != null) + { + char chChar = sWord.Length > iSimilarity ? sWord[sWord.Length - 1 - iSimilarity] : '\0'; + if (dictSubNodes.ContainsKey(chChar) && dictSubNodes[chChar].ConditionSatisfied(sWord)) + return dictSubNodes[chChar].Lemmatize(sWord); + } + return lrBestRule.Lemmatize(sWord); + } + + #endregion + + #region Output Functions (ToString) + public override string ToString() + { + var sb = new StringBuilder(); + ToString(sb, 0); + return sb.ToString(); + } + + private void ToString(StringBuilder sb, int iLevel) + { + sb.Append(new string('\t', iLevel)); + sb.AppendFormat("Suffix=\"{0}{1}\"; ", bWholeWord ? "^" : string.Empty, sCondition); + sb.AppendFormat("Rule=\"{0}\"; ", lrBestRule); + sb.AppendFormat("Weight=\"{0}\"; ", dWeight); + if (aBestRules != null && aBestRules.Length > 0) + sb.AppendFormat("Cover={0}; ", aBestRules[0].Weight); + sb.Append("Rulles="); + if (aBestRules != null) + { + foreach (var rw in aBestRules) + sb.AppendFormat(" {0}", rw); + } + sb.Append("; "); + sb.AppendLine(); + if (dictSubNodes != null) + { + foreach (var ltnChild in dictSubNodes.Values) + { + ltnChild.ToString(sb, iLevel + 1); + } + } + } + #endregion + + #region Serialization Functions (Binary) + public void Serialize(BinaryWriter binWrt) + { + binWrt.Write(dictSubNodes != null); + if (dictSubNodes != null) + { + binWrt.Write(dictSubNodes.Count); + foreach (var kvp in dictSubNodes) + { + binWrt.Write(kvp.Key); + kvp.Value.Serialize(binWrt); + } + } + binWrt.Write(iSimilarity); + binWrt.Write(sCondition); + binWrt.Write(bWholeWord); + binWrt.Write(lrBestRule.Signature); + binWrt.Write(aBestRules.Length); + for (var i = 0; i < aBestRules.Length; i++) + { + binWrt.Write(aBestRules[i].Rule.Signature); + binWrt.Write(aBestRules[i].Weight); + } + binWrt.Write(dWeight); + binWrt.Write(iStart); + binWrt.Write(iEnd); + } + + public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode) + { + this.lsett = lsett; + if (binRead.ReadBoolean()) + { + dictSubNodes = new Dictionary(); + var iCount = binRead.ReadInt32(); + for (var i = 0; i < iCount; i++) + { + var cKey = binRead.ReadChar(); + var ltrSub = new LemmaTreeNode(binRead, this.lsett, elExamples, this); + dictSubNodes.Add(cKey, ltrSub); + } + } + else + { + dictSubNodes = null; + } + this.ltnParentNode = ltnParentNode; + + iSimilarity = binRead.ReadInt32(); + sCondition = binRead.ReadString(); + bWholeWord = binRead.ReadBoolean(); + lrBestRule = elExamples.Rules[binRead.ReadString()]; + + var iCountBest = binRead.ReadInt32(); + aBestRules = new RuleWeighted[iCountBest]; + for (var i = 0; i < iCountBest; i++) + { + aBestRules[i] = + new RuleWeighted(elExamples.Rules[binRead.ReadString()], binRead.ReadDouble()); + } + dWeight = binRead.ReadDouble(); + iStart = binRead.ReadInt32(); + iEnd = binRead.ReadInt32(); + this.elExamples = elExamples; + } + public LemmaTreeNode(BinaryReader binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode) + { + Deserialize(binRead, lsett, elExamples, ltnParentNode); + } + #endregion + + #region Serialization Functions (Latino) +#if LATINO + public void Save(Latino.BinarySerializer binWrt) { + binWrt.WriteBool(dictSubNodes != null); + if (dictSubNodes != null) { + binWrt.WriteInt(dictSubNodes.Count); + foreach (KeyValuePair kvp in dictSubNodes) { + binWrt.WriteChar(kvp.Key); + kvp.Value.Save(binWrt); + } + } + + binWrt.WriteInt(iSimilarity); + binWrt.WriteString(sCondition); + binWrt.WriteBool(bWholeWord); + + binWrt.WriteString(lrBestRule.Signature); + binWrt.WriteInt(aBestRules.Length); + for (int i = 0; i < aBestRules.Length; i++) { + binWrt.WriteString(aBestRules[i].Rule.Signature); + binWrt.WriteDouble(aBestRules[i].Weight); + } + binWrt.WriteDouble(dWeight); + + binWrt.WriteInt(iStart); + binWrt.WriteInt(iEnd); + } + public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode) { + this.lsett = lsett; + + if (binRead.ReadBool()) { + dictSubNodes = new Dictionary(); + int iCount = binRead.ReadInt(); + for (int i = 0; i < iCount; i++) { + char cKey = binRead.ReadChar(); + LemmaTreeNode ltrSub = new LemmaTreeNode(binRead, this.lsett, elExamples, this); + dictSubNodes.Add(cKey, ltrSub); + } + } + else + dictSubNodes = null; + + this.ltnParentNode = ltnParentNode; + + iSimilarity = binRead.ReadInt(); + sCondition = binRead.ReadString(); + bWholeWord = binRead.ReadBool(); + + lrBestRule = elExamples.Rules[binRead.ReadString()]; + + int iCountBest = binRead.ReadInt(); + aBestRules = new RuleWeighted[iCountBest]; + for (int i = 0; i < iCountBest; i++) + aBestRules[i] = new RuleWeighted(elExamples.Rules[binRead.ReadString()], binRead.ReadDouble()); + + dWeight = binRead.ReadDouble(); + + iStart = binRead.ReadInt(); + iEnd = binRead.ReadInt(); + this.elExamples = elExamples; + + } + public LemmaTreeNode(Latino.BinarySerializer binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode) { + Load(binRead, lsett, elExamples, ltnParentNode); + } +#endif + #endregion + + #region Other (Temporarly) + //TODO - this is temp function, remove it + public bool CheckConsistency() + { + var bReturn = true; + if (dictSubNodes != null) + foreach (var ltnChild in dictSubNodes.Values) + bReturn = bReturn && + ltnChild.CheckConsistency() && + ltnChild.sCondition.EndsWith(sCondition); + return bReturn; + } + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/Lemmatizer.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/Lemmatizer.cs new file mode 100644 index 0000000..b63632a --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/Lemmatizer.cs @@ -0,0 +1,465 @@ +using System; +using System.Collections.Generic; +using System.Text; +using System.IO; +using System.Runtime.Serialization; +using System.IO.Compression; +using SevenZip; + +namespace LemmaSharp +{ + [Serializable] + public class Lemmatizer : ITrainableLemmatizer +#if LATINO + , Latino.ISerializable +#endif + { + + #region Private Variables + protected LemmatizerSettings lsett; + protected ExampleList elExamples; + protected LemmaTreeNode ltnRootNode; + protected LemmaTreeNode ltnRootNodeFront; + #endregion + + #region Constructor(s) + public Lemmatizer() : + this(new LemmatizerSettings()) + { } + + public Lemmatizer(LemmatizerSettings lsett) + { + this.lsett = lsett; + this.elExamples = new ExampleList(lsett); + this.ltnRootNode = null; + this.ltnRootNodeFront = null; + } + + public Lemmatizer(StreamReader srIn, string sFormat, LemmatizerSettings lsett) : this(lsett) + { + AddMultextFile(srIn, sFormat); + } + #endregion + + #region Private Properties + private LemmaTreeNode ltrRootNodeSafe + { + get + { + if (ltnRootNode == null) + BuildModel(); + return ltnRootNode; + } + } + private LemmaTreeNode ltrRootNodeFrontSafe + { + get + { + if (ltnRootNodeFront == null && lsett.bBuildFrontLemmatizer) + BuildModel(); + return ltnRootNodeFront; + } + } + #endregion + + #region Public Properties + public LemmatizerSettings Settings + { + get + { + return lsett.CloneDeep(); + } + } + public ExampleList Examples + { + get + { + return elExamples; + } + } + public RuleList Rules + { + get + { + return elExamples.Rules; + } + } + public LemmaTreeNode RootNode + { + get + { + return ltrRootNodeSafe; + } + } + public LemmaTreeNode RootNodeFront + { + get + { + return ltrRootNodeFrontSafe; + } + } + public ILemmatizerModel Model + { + get + { + return ltrRootNodeSafe; + } + } + #endregion + + #region Essential Class Functions (adding examples to repository) + public void AddMultextFile(StreamReader srIn, string sFormat) + { + this.elExamples.AddMultextFile(srIn, sFormat); + ltnRootNode = null; + } + public void AddExample(string sWord, string sLemma) + { + AddExample(sWord, sLemma, 1, null); + } + public void AddExample(string sWord, string sLemma, double dWeight) + { + AddExample(sWord, sLemma, dWeight, null); + } + public void AddExample(string sWord, string sLemma, double dWeight, string sMsd) + { + elExamples.AddExample(sWord, sLemma, dWeight, sMsd); + ltnRootNode = null; + } + public void DropExamples() + { + elExamples.DropExamples(); + } + public void FinalizeAdditions() + { + elExamples.FinalizeAdditions(); + } + #endregion + + #region Essential Class Functions (building model & lemmatizing) + public void BuildModel() + { + if (ltnRootNode != null) + return; + + if (!lsett.bBuildFrontLemmatizer) + { + //TODO remove: elExamples.FinalizeAdditions(); + elExamples.FinalizeAdditions(); + ltnRootNode = new LemmaTreeNode(lsett, elExamples); + } + else + { + ltnRootNode = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(false)); + ltnRootNodeFront = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(true)); + } + } + + public string Lemmatize(string sWord) + { + if (!lsett.bBuildFrontLemmatizer) + { + return ltrRootNodeSafe.Lemmatize(sWord); + } + var sWordFront = LemmaExample.StringReverse(sWord); + var sLemmaFront = ltrRootNodeFrontSafe.Lemmatize(sWordFront); + var sWordRear = LemmaExample.StringReverse(sLemmaFront); + return ltrRootNodeSafe.Lemmatize(sWordRear); + } + #endregion + + #region Serialization Functions (ISerializable) + public void GetObjectData(SerializationInfo info, StreamingContext context) + { + info.AddValue("lsett", lsett); + info.AddValue("elExamples", elExamples); + } + + public Lemmatizer(SerializationInfo info, StreamingContext context) : this() + { + lsett = (LemmatizerSettings)info.GetValue("lsett", typeof(LemmatizerSettings)); + elExamples = (ExampleList)info.GetValue("elExamples", typeof(ExampleList)); + this.BuildModel(); + } + #endregion + + #region Serialization Functions (Binary) + public void Serialize(BinaryWriter binWrt, bool bSerializeExamples) + { + lsett.Serialize(binWrt); + binWrt.Write(bSerializeExamples); + elExamples.Serialize(binWrt, bSerializeExamples, false); + + if (!bSerializeExamples) + { + elExamples.GetFrontRearExampleList(false).Serialize(binWrt, bSerializeExamples, false); + elExamples.GetFrontRearExampleList(true).Serialize(binWrt, bSerializeExamples, false); + } + ltnRootNode.Serialize(binWrt); + if (lsett.bBuildFrontLemmatizer) + ltnRootNodeFront.Serialize(binWrt); + } + + public void Deserialize(BinaryReader binRead) + { + lsett = new LemmatizerSettings(binRead); + + var bSerializeExamples = binRead.ReadBoolean(); + elExamples = new ExampleList(binRead, lsett); + + ExampleList elExamplesRear; + ExampleList elExamplesFront; + + if (bSerializeExamples) + { + elExamplesRear = elExamples.GetFrontRearExampleList(false); + elExamplesFront = elExamples.GetFrontRearExampleList(true); + } + else + { + elExamplesRear = new ExampleList(binRead, lsett); + elExamplesFront = new ExampleList(binRead, lsett); + } + + if (!lsett.bBuildFrontLemmatizer) + { + ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples, null); + } + else + { + ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamplesRear, null); + ltnRootNodeFront = new LemmaTreeNode(binRead, lsett, elExamplesFront, null); + } + } + + //Do not change the order!!! (If new compression algorithms are added, otherwise you will not be able to load old files.) + public enum Compression + { + None, + Deflate, + LZMA + } + + public Lemmatizer(BinaryReader binRead) + { + var compr = (Compression)binRead.ReadByte(); + if (compr == Compression.None) + Deserialize(binRead); + else + throw new Exception("Loading lemmatizer with binary reader on uncompressed stream is not supported."); + } + + public Lemmatizer(Stream streamIn) + { + Deserialize(streamIn); + } + + public void Serialize(Stream streamOut) + { + Serialize(streamOut, true, Compression.None); + } + public void Serialize(Stream streamOut, bool bSerializeExamples) + { + Serialize(streamOut, bSerializeExamples, Compression.None); + } + public void Serialize(Stream streamOut, bool bSerializeExamples, Compression compress) + { + streamOut.WriteByte((byte)compress); + switch (compress) + { + case Compression.None: + SerializeNone(streamOut, bSerializeExamples); + break; + case Compression.Deflate: + SerializeDeflate(streamOut, bSerializeExamples); + break; + case Compression.LZMA: + SerializeLZMA(streamOut, bSerializeExamples); + break; + default: + break; + } + } + + private void SerializeNone(Stream streamOut, bool bSerializeExamples) + { + using (var binWrt = new BinaryWriter(streamOut)) + { + this.Serialize(binWrt, bSerializeExamples); + } + } + + private void SerializeDeflate(Stream streamOut, bool bSerializeExamples) + { + using (var streamOutNew = new DeflateStream(streamOut, CompressionMode.Compress, true)) + { + using (var binWrt = new BinaryWriter(streamOutNew)) + { + this.Serialize(binWrt, bSerializeExamples); + binWrt.Flush(); + binWrt.Close(); + } + } + } + + private void SerializeLZMA(Stream streamOut, bool bSerializeExamples) + { + CoderPropID[] propIDs = + { + CoderPropID.DictionarySize, + CoderPropID.PosStateBits, + CoderPropID.LitContextBits, + CoderPropID.LitPosBits, + CoderPropID.Algorithm, + CoderPropID.NumFastBytes, + CoderPropID.MatchFinder, + CoderPropID.EndMarker + }; + + Int32 dictionary = 1 << 23; + Int32 posStateBits = 2; + Int32 litContextBits = 3; // for normal files + Int32 litPosBits = 0; + Int32 algorithm = 2; + Int32 numFastBytes = 128; + var mf = "bt4"; + var eos = false; + + object[] properties = + { + (Int32)(dictionary), + (Int32)(posStateBits), + (Int32)(litContextBits), + (Int32)(litPosBits), + (Int32)(algorithm), + (Int32)(numFastBytes), + mf, + eos + }; + + using (var msTemp = new MemoryStream()) + { + using (var binWrtTemp = new BinaryWriter(msTemp)) + { + this.Serialize(binWrtTemp, bSerializeExamples); + msTemp.Position = 0; + var encoder = new SevenZip.Compression.LZMA.Encoder(); + encoder.SetCoderProperties(propIDs, properties); + encoder.WriteCoderProperties(streamOut); + var fileSize = msTemp.Length; + for (int i = 0; i < 8; i++) + { + streamOut.WriteByte((Byte)(fileSize >> (8 * i))); + } + encoder.Code(msTemp, streamOut, -1, -1, null); + binWrtTemp.Close(); + encoder = null; + } + msTemp.Close(); + } + } + + public void Deserialize(Stream streamIn) + { + var compr = (Compression)streamIn.ReadByte(); + using (var streamInNew = Decompress(streamIn, compr)) + { + using (var br = new BinaryReader(streamInNew)) + { + Deserialize(br); + } + } + } + + private Stream Decompress(Stream streamIn, Compression compress) + { + Stream streamInNew; + switch (compress) + { + case Compression.None: + default: + streamInNew = streamIn; + break; + case Compression.Deflate: + streamInNew = new DeflateStream(streamIn, CompressionMode.Decompress); + break; + case Compression.LZMA: + streamInNew = DecompressLZMA(streamIn); + break; + } + return streamInNew; + } + + private Stream DecompressLZMA(Stream streamIn) + { + var properties = new byte[5]; + if (streamIn.Read(properties, 0, 5) != 5) + throw new Exception("input .lzma is too short"); + var decoder = new SevenZip.Compression.LZMA.Decoder(); + decoder.SetDecoderProperties(properties); + + long outSize = 0; + for (var i = 0; i < 8; i++) + { + var v = streamIn.ReadByte(); + if (v < 0) + throw (new Exception("Can't Read 1")); + outSize |= ((long)(byte)v) << (8 * i); + } + var compressedSize = streamIn.Length - streamIn.Position; + var outStream = new MemoryStream(); + decoder.Code(streamIn, outStream, compressedSize, outSize, null); + outStream.Seek(0, 0); + decoder = null; + return outStream; + } + #endregion + + #region Serialization Functions (Latino) +#if LATINO + + public void Save(Latino.BinarySerializer binWrt) { + lsett.Save(binWrt); + + elExamples.Save(binWrt, true, false); + + ltnRootNode.Save(binWrt); + if (lsett.bBuildFrontLemmatizer) + ltnRootNodeFront.Save(binWrt); + } + + public void Load(Latino.BinarySerializer binRead) { + lsett = new LemmatizerSettings(binRead); + elExamples = new ExampleList(binRead, lsett); + if (!lsett.bBuildFrontLemmatizer) { + ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples, null); + } + else { + ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples.GetFrontRearExampleList(false) , null); + ltnRootNodeFront = new LemmaTreeNode(binRead, lsett, elExamples.GetFrontRearExampleList(true), null); + } + } + + public Lemmatizer(Latino.BinarySerializer binRead) { + Load(binRead); + } + + public void Save(Stream streamOut) { + Latino.BinarySerializer binWrt = new Latino.BinarySerializer(streamOut); + this.Save(binWrt); + binWrt.Close(); + } + public void Load(Stream streamIn) { + Latino.BinarySerializer binRead = new Latino.BinarySerializer(streamIn); + Load(binRead); + binRead.Close(); + } + + public Lemmatizer(Stream streamIn, string sDummy) { + Load(streamIn); + } + +#endif + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/LemmatizerSettings.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/LemmatizerSettings.cs new file mode 100644 index 0000000..04ed7a0 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/LemmatizerSettings.cs @@ -0,0 +1,143 @@ +using System; +using System.IO; +using System.Runtime.Serialization; + +namespace LemmaSharp +{ + /// + /// These are the lemmagen algorithm settings that affect speed/power of the learning and lemmatizing algorithm. + /// TODO this class will be probbably removed in the future. + /// + [Serializable] + public class LemmatizerSettings : ISerializable + { + #region Constructor(s) + public LemmatizerSettings() + { + } + #endregion + + #region Sub-Structures + /// + /// How algorithm considers msd tags. + /// + public enum MsdConsideration + { + /// + /// Completely ignores mds tags (join examples with different tags and sum their weihgts). + /// + Ignore, + /// + /// Same examples with different msd's are not considered equal and joined. + /// + Distinct, + /// + /// Joins examples with different tags (concatenates all msd tags). + /// + JoinAll, + /// + /// Joins examples with different tags (concatenates just distinct msd tags - somehow slower). + /// + JoinDistinct, + /// + /// Joins examples with different tags (new tag is the left to right substring that all joined examples share). + /// + JoinSameSubstring + } + #endregion + + #region Public Variables + /// + /// True if from string should be included in rule identifier ([from]->[to]). False if just length of from string is used ([#len]->[to]). + /// + public bool bUseFromInRules = true; + /// + /// Specification how algorithm considers msd tags. + /// + public MsdConsideration eMsdConsider = MsdConsideration.Distinct; + /// + /// How many of the best rules are kept in memory for each node. Zero means unlimited. + /// + public int iMaxRulesPerNode = 0; + /// + /// If true, than build proccess uses few more hevristics to build first left to right lemmatizer (lemmatizes front of the word) + /// + public bool bBuildFrontLemmatizer = false; + #endregion + + #region Cloneable functions + public LemmatizerSettings CloneDeep() + { + return new LemmatizerSettings() + { + bUseFromInRules = this.bUseFromInRules, + eMsdConsider = this.eMsdConsider, + iMaxRulesPerNode = this.iMaxRulesPerNode, + bBuildFrontLemmatizer = this.bBuildFrontLemmatizer + }; + } + #endregion + + #region Serialization Functions (ISerializable) + public void GetObjectData(SerializationInfo info, StreamingContext context) + { + info.AddValue("bUseFromInRules", bUseFromInRules); + info.AddValue("eMsdConsider", eMsdConsider); + info.AddValue("iMaxRulesPerNode", iMaxRulesPerNode); + info.AddValue("bBuildFrontLemmatizer", bBuildFrontLemmatizer); + } + public LemmatizerSettings(SerializationInfo info, StreamingContext context) + { + bUseFromInRules = info.GetBoolean("bUseFromInRules"); + eMsdConsider = (MsdConsideration)info.GetValue("eMsdConsider", typeof(MsdConsideration)); + iMaxRulesPerNode = info.GetInt32("iMaxRulesPerNode"); + bBuildFrontLemmatizer = info.GetBoolean("bBuildFrontLemmatizer"); + } + #endregion + + #region Serialization Functions (Binary) + public void Serialize(BinaryWriter binWrt) + { + binWrt.Write(bUseFromInRules); + binWrt.Write((int)eMsdConsider); + binWrt.Write(iMaxRulesPerNode); + binWrt.Write(bBuildFrontLemmatizer); + } + public void Deserialize(BinaryReader binRead) + { + bUseFromInRules = binRead.ReadBoolean(); + eMsdConsider = (MsdConsideration)binRead.ReadInt32(); + iMaxRulesPerNode = binRead.ReadInt32(); + bBuildFrontLemmatizer = binRead.ReadBoolean(); + } + public LemmatizerSettings(System.IO.BinaryReader binRead) + { + this.Deserialize(binRead); + } + #endregion + + #region Serialization Functions (Latino) +#if LATINO + + public void Save(Latino.BinarySerializer binWrt) { + binWrt.WriteBool(bUseFromInRules); + binWrt.WriteInt((int)eMsdConsider); + binWrt.WriteInt(iMaxRulesPerNode); + binWrt.WriteBool(bBuildFrontLemmatizer); + } + + public void Load(Latino.BinarySerializer binRead) { + bUseFromInRules = binRead.ReadBool(); + eMsdConsider = (MsdConsideration)binRead.ReadInt(); + iMaxRulesPerNode = binRead.ReadInt(); + bBuildFrontLemmatizer = binRead.ReadBool(); + } + + public LemmatizerSettings(Latino.BinarySerializer reader) { + Load(reader); + } + +#endif + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/RuleList.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/RuleList.cs new file mode 100644 index 0000000..fbaad6f --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/RuleList.cs @@ -0,0 +1,161 @@ +using System.Collections.Generic; +using System.IO; + +namespace LemmaSharp +{ + public class RuleList : Dictionary + { + #region Private Variables + private LemmatizerSettings lsett; + private LemmaRule lrDefaultRule; + #endregion + + #region Constructor(s) + public RuleList(LemmatizerSettings lsett) + { + this.lsett = lsett; + lrDefaultRule = AddRule(new LemmaRule("", "", 0, lsett)); + } + #endregion + + #region Public Properties + public LemmaRule DefaultRule + { + get + { + return lrDefaultRule; + } + } + #endregion + + #region Essential Class Functions + public LemmaRule AddRule(LemmaExample le) + { + return AddRule(new LemmaRule(le.Word, le.Lemma, this.Count, lsett)); + } + private LemmaRule AddRule(LemmaRule lrRuleNew) + { + LemmaRule lrRuleReturn = null; + if (!this.TryGetValue(lrRuleNew.Signature, out lrRuleReturn)) + { + lrRuleReturn = lrRuleNew; + this.Add(lrRuleReturn.Signature, lrRuleReturn); + } + return lrRuleReturn; + } + #endregion + + #region Serialization Functions (Binary) + public void Serialize(BinaryWriter binWrt, bool bThisTopObject) + { + //save metadata + binWrt.Write(bThisTopObject); + + //save value types -------------------------------------- + + //save refernce types if needed ------------------------- + if (bThisTopObject) + lsett.Serialize(binWrt); + + //save list items --------------------------------------- + var iCount = this.Count; + binWrt.Write(iCount); + foreach (var kvp in this) + { + binWrt.Write(kvp.Key); + kvp.Value.Serialize(binWrt, false); + } + //default rule is already saved in the list. Here just save its id. + binWrt.Write(lrDefaultRule.Signature); + } + + public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett) + { + //load metadata + var bThisTopObject = binRead.ReadBoolean(); + + //load value types -------------------------------------- + + //load refernce types if needed ------------------------- + if (bThisTopObject) + this.lsett = new LemmatizerSettings(binRead); + else + this.lsett = lsett; + + //load list items --------------------------------------- + this.Clear(); + int iCount = binRead.ReadInt32(); + for (var iId = 0; iId < iCount; iId++) + { + var sKey = binRead.ReadString(); + var lrVal = new LemmaRule(binRead, this.lsett); + this.Add(sKey, lrVal); + } + + //link the default rule just Id was saved. + lrDefaultRule = this[binRead.ReadString()]; + } + + public RuleList(System.IO.BinaryReader binRead, LemmatizerSettings lsett) + { + this.Deserialize(binRead, lsett); + } + #endregion + + #region Serialization Functions (Latino) +#if LATINO + + public void Save(Latino.BinarySerializer binWrt, bool bThisTopObject) { + //save metadata + binWrt.WriteBool(bThisTopObject); + + //save value types -------------------------------------- + + //save refernce types if needed ------------------------- + if (bThisTopObject) + lsett.Save(binWrt); + + //save list items --------------------------------------- + int iCount = this.Count; + binWrt.WriteInt(iCount); + foreach (KeyValuePair kvp in this) { + binWrt.WriteString(kvp.Key); + kvp.Value.Save(binWrt, false); + } + + //default rule is already saved in the list. Here just save its id. + binWrt.WriteString(lrDefaultRule.Signature); + } + public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett) { + //load metadata + bool bThisTopObject = binRead.ReadBool(); + + //load value types -------------------------------------- + + //load refernce types if needed ------------------------- + if (bThisTopObject) + this.lsett = new LemmatizerSettings(binRead); + else + this.lsett = lsett; + + //load list items --------------------------------------- + this.Clear(); + int iCount = binRead.ReadInt(); + for (int iId = 0; iId < iCount; iId++) { + string sKey = binRead.ReadString(); + LemmaRule lrVal = new LemmaRule(binRead, this.lsett); + this.Add(sKey, lrVal); + } + + //link the default rule just Id was saved. + lrDefaultRule = this[binRead.ReadString()]; + + } + public RuleList(Latino.BinarySerializer binRead, LemmatizerSettings lsett) { + Load(binRead, lsett); + } + +#endif + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/RuleWeighted.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/RuleWeighted.cs new file mode 100644 index 0000000..c4d332b --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Classes/RuleWeighted.cs @@ -0,0 +1,50 @@ +using System; + +namespace LemmaSharp +{ + [Serializable] + class RuleWeighted : IComparable + { + #region Private Variables + private LemmaRule lrRule; + private double dWeight; + #endregion + + #region Constructor(s) + public RuleWeighted(LemmaRule lrRule, double dWeight) + { + this.lrRule = lrRule; + this.dWeight = dWeight; + } + #endregion + + #region Public Properties + public LemmaRule Rule + { + get { return lrRule; } + } + public double Weight + { + get { return dWeight; } + } + #endregion + + #region Essential Class Functions (comparing objects, eg.: for sorting) + public int CompareTo(RuleWeighted rl) + { + if (this.dWeight < rl.dWeight) return 1; + if (this.dWeight > rl.dWeight) return -1; + if (this.lrRule.Id < rl.lrRule.Id) return 1; + if (this.lrRule.Id > rl.lrRule.Id) return -1; + return 0; + } + #endregion + + #region Output & Serialization Functions + public override string ToString() + { + return string.Format("{0}{1:(0.00%)}", lrRule, dWeight); + } + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/ExternalLibs/7zipSources.7z b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/ExternalLibs/7zipSources.7z new file mode 100644 index 0000000..5463ceb Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/ExternalLibs/7zipSources.7z differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/ExternalLibs/Lzma#.dll b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/ExternalLibs/Lzma#.dll new file mode 100644 index 0000000..2bb9990 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/ExternalLibs/Lzma#.dll differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Interfaces/ILemmatizer.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Interfaces/ILemmatizer.cs new file mode 100644 index 0000000..50c13a0 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Interfaces/ILemmatizer.cs @@ -0,0 +1,9 @@ +using System.Runtime.Serialization; + +namespace LemmaSharp +{ + public interface ILemmatizer : ISerializable + { + string Lemmatize(string sWord); + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Interfaces/ILemmatizerModel.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Interfaces/ILemmatizerModel.cs new file mode 100644 index 0000000..367203c --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Interfaces/ILemmatizerModel.cs @@ -0,0 +1,8 @@ +namespace LemmaSharp +{ + public interface ILemmatizerModel + { + string Lemmatize(string sWord); + string ToString(); + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Interfaces/ILemmatizerTrainable.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Interfaces/ILemmatizerTrainable.cs new file mode 100644 index 0000000..cfd18ab --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/Interfaces/ILemmatizerTrainable.cs @@ -0,0 +1,12 @@ +namespace LemmaSharp +{ + public interface ITrainableLemmatizer : ILemmatizer + { + ExampleList Examples { get; } + ILemmatizerModel Model { get; } + void AddExample(string sWord, string sLemma); + void AddExample(string sWord, string sLemma, double dWeight); + void AddExample(string sWord, string sLemma, double dWeight, string sMsd); + void BuildModel(); + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/LatinoCompatibility/BinarySerializer.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/LatinoCompatibility/BinarySerializer.cs new file mode 100644 index 0000000..74effcb --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/LatinoCompatibility/BinarySerializer.cs @@ -0,0 +1,539 @@ +/*==========================================================================; + * + * (c) 2004-08 JSI. All rights reserved. + * + * File: BinarySerializer.cs + * Version: 1.0 + * Desc: Binary serializer + * Author: Miha Grcar + * Created on: Oct-2004 + * Last modified: May-2008 + * Revision: May-2008 + * + ***************************************************************************/ + +//Remark: Use this file as Latino compatibility checker. When it is included in +// the project it defines symbol LATINO, that should enable all Latino specific +// serialization functions. When excluded, this code will not be created and also +// following Latino namspace will not be added to the project. + + +using System; +using System.Runtime.InteropServices; +using System.Collections.Generic; +using System.Reflection; +using System.Text; +using System.IO; + +#if LATINO + +namespace Latino +{ + /* .----------------------------------------------------------------------- + | + | Class BinarySerializer + | + '----------------------------------------------------------------------- + */ + public interface ISerializable { + // *** note that you need to implement a constructor that loads the instance if the class implements Latino.ISerializable + void Save(Latino.BinarySerializer writer); + } + + public class BinarySerializer + { + private static Dictionary m_full_to_short_type_name + = new Dictionary(); + private static Dictionary m_short_to_full_type_name + = new Dictionary(); + private Stream m_stream; + private string m_data_dir + = "."; + private static void RegisterTypeName(string full_type_name, string short_type_name) + { + m_full_to_short_type_name.Add(full_type_name, short_type_name); + m_short_to_full_type_name.Add(short_type_name, full_type_name); + } + private static string GetFullTypeName(string short_type_name) + { + return m_short_to_full_type_name.ContainsKey(short_type_name) ? m_short_to_full_type_name[short_type_name] : short_type_name; + } + private static string GetShortTypeName(string full_type_name) + { + return m_full_to_short_type_name.ContainsKey(full_type_name) ? m_full_to_short_type_name[full_type_name] : full_type_name; + } + static BinarySerializer() + { + RegisterTypeName(typeof(bool).AssemblyQualifiedName, "b"); + RegisterTypeName(typeof(byte).AssemblyQualifiedName, "ui1"); + RegisterTypeName(typeof(sbyte).AssemblyQualifiedName, "i1"); + RegisterTypeName(typeof(char).AssemblyQualifiedName, "c"); + RegisterTypeName(typeof(double).AssemblyQualifiedName, "f8"); + RegisterTypeName(typeof(float).AssemblyQualifiedName, "f4"); + RegisterTypeName(typeof(int).AssemblyQualifiedName, "i4"); + RegisterTypeName(typeof(uint).AssemblyQualifiedName, "ui4"); + RegisterTypeName(typeof(long).AssemblyQualifiedName, "i8"); + RegisterTypeName(typeof(ulong).AssemblyQualifiedName, "ui8"); + RegisterTypeName(typeof(short).AssemblyQualifiedName, "i2"); + RegisterTypeName(typeof(ushort).AssemblyQualifiedName, "ui2"); + RegisterTypeName(typeof(string).AssemblyQualifiedName, "s"); + } + public BinarySerializer(Stream stream) + { + //Utils.ThrowException(stream == null ? new ArgumentNullException("stream") : null); + m_stream = stream; + } + public BinarySerializer() + { + m_stream = new MemoryStream(); + } + public BinarySerializer(string file_name, FileMode file_mode) + { + m_stream = new FileStream(file_name, file_mode); // throws ArgumentException, NotSupportedException, ArgumentNullException, SecurityException, FileNotFoundException, IOException, DirectoryNotFoundException, PathTooLongException, ArgumentOutOfRangeException + } + // *** Reading *** + private byte[] Read() // Read() is directly or indirectly called from several methods thus exceptions thrown here can also be thrown in all those methods + { + int sz = Marshal.SizeOf(typeof(T)); + byte[] buffer = new byte[sz]; + int num_bytes = m_stream.Read(buffer, 0, sz); // throws IOException, NotSupportedException, ObjectDisposedException + //Utils.ThrowException(num_bytes < sz ? new EndOfStreamException() : null); + return buffer; + } + public bool ReadBool() + { + return ReadByte() != 0; + } + public byte ReadByte() // ReadByte() is directly or indirectly called from several methods thus exceptions thrown here can also be thrown in all those methods + { + int val = m_stream.ReadByte(); // throws NotSupportedException, ObjectDisposedException + //Utils.ThrowException(val < 0 ? new EndOfStreamException() : null); + return (byte)val; + } + public sbyte ReadSByte() + { + return (sbyte)ReadByte(); + } + private char ReadChar8() + { + return (char)ReadByte(); + } + private char ReadChar16() + { + return BitConverter.ToChar(Read(), 0); + } + public char ReadChar() + { + return ReadChar16(); + } + public double ReadDouble() + { + return BitConverter.ToDouble(Read(), 0); + } + public float ReadFloat() + { + return BitConverter.ToSingle(Read(), 0); + } + public int ReadInt() + { + return BitConverter.ToInt32(Read(), 0); + } + public uint ReadUInt() + { + return BitConverter.ToUInt32(Read(), 0); + } + public long ReadLong() + { + return BitConverter.ToInt64(Read(), 0); + } + public ulong ReadULong() + { + return BitConverter.ToUInt64(Read(), 0); + } + public short ReadShort() + { + return BitConverter.ToInt16(Read(), 0); + } + public ushort ReadUShort() + { + return BitConverter.ToUInt16(Read(), 0); + } + private string ReadString8() + { + int len = ReadInt(); + if (len < 0) { return null; } + byte[] buffer = new byte[len]; + m_stream.Read(buffer, 0, len); // throws IOException, NotSupportedException, ObjectDisposedException + return Encoding.ASCII.GetString(buffer); + } + private string ReadString16() + { + int len = ReadInt(); + if (len < 0) { return null; } + byte[] buffer = new byte[len * 2]; + m_stream.Read(buffer, 0, len * 2); // throws IOException, NotSupportedException, ObjectDisposedException + return Encoding.Unicode.GetString(buffer); + } + public string ReadString() + { + return ReadString16(); // throws exceptions (see ReadString16()) + } + public Type ReadType() + { + string type_name = ReadString8(); // throws exceptions (see ReadString8()) + //Utils.ThrowException(type_name == null ? new InvalidDataException() : null); + return Type.GetType(GetFullTypeName(type_name)); // throws TargetInvocationException, ArgumentException, TypeLoadException, FileNotFoundException, FileLoadException, BadImageFormatException + } + public ValueType ReadValue(Type type) + { + //Utils.ThrowException(type == null ? new ArgumentNullException("type") : null); + //Utils.ThrowException(!type.IsValueType ? new InvalidArgumentValueException("type") : null); + if (type == typeof(bool)) + { + return ReadBool(); + } + else if (type == typeof(byte)) + { + return ReadByte(); + } + else if (type == typeof(sbyte)) + { + return ReadSByte(); + } + else if (type == typeof(char)) + { + return ReadChar(); + } + else if (type == typeof(double)) + { + return ReadDouble(); + } + else if (type == typeof(float)) + { + return ReadFloat(); + } + else if (type == typeof(int)) + { + return ReadInt(); + } + else if (type == typeof(uint)) + { + return ReadUInt(); + } + else if (type == typeof(long)) + { + return ReadLong(); + } + else if (type == typeof(ulong)) + { + return ReadULong(); + } + else if (type == typeof(short)) + { + return ReadShort(); + } + else if (type == typeof(ushort)) + { + return ReadUShort(); + } + else if (typeof(Latino.ISerializable).IsAssignableFrom(type)) + { + ConstructorInfo cxtor = type.GetConstructor(new Type[] { typeof(Latino.BinarySerializer) }); + //Utils.ThrowException(cxtor == null ? new ArgumentNotSupportedException("type") : null); + return (ValueType)cxtor.Invoke(new object[] { this }); // throws MemberAccessException, MethodAccessException, TargetInvocationException, NotSupportedException, SecurityException + } + else + { + //throw new ArgumentNotSupportedException("type"); + throw new Exception("type"); + } + } + public T ReadValue() + { + return (T)(object)ReadValue(typeof(T)); // throws exceptions (see ReadValue(Type type)) + } + public object ReadObject(Type type) + { + //Utils.ThrowException(type == null ? new ArgumentNullException("type") : null); + switch (ReadByte()) + { + case 0: + return null; + case 1: + break; + case 2: + Type type_0 = ReadType(); // throws exceptions (see ReadType()) + //Utils.ThrowException(type_0 == null ? new TypeLoadException() : null); + //Utils.ThrowException(!type.IsAssignableFrom(type_0) ? new InvalidArgumentValueException("type") : null); + type = type_0; + break; + default: + throw new InvalidDataException(); + } + if (type == typeof(string)) + { + return ReadString(); + } + else if (typeof(Latino.ISerializable).IsAssignableFrom(type)) + { + ConstructorInfo cxtor = type.GetConstructor(new Type[] { typeof(Latino.BinarySerializer) }); + //Utils.ThrowException(cxtor == null ? new ArgumentNotSupportedException("type") : null); + return cxtor.Invoke(new object[] { this }); // throws MemberAccessException, MethodAccessException, TargetInvocationException, NotSupportedException, SecurityException + } + else if (type.IsValueType) + { + return ReadValue(type); // throws exceptions (see ReadValue(Type type)) + } + else + { + //throw new InvalidArgumentValueException("type"); + throw new Exception("type"); + } + } + public T ReadObject() + { + return (T)ReadObject(typeof(T)); // throws exceptions (see ReadObject(Type type)) + } + public object ReadValueOrObject(Type type) + { + //Utils.ThrowException(type == null ? new ArgumentNullException("type") : null); + if (type.IsValueType) + { + return ReadValue(type); // throws exceptions (see ReadValue(Type type)) + } + else + { + return ReadObject(type); // throws exceptions (see ReadObject(Type type)) + } + } + public T ReadValueOrObject() + { + return (T)ReadValueOrObject(typeof(T)); // throws exceptions (see ReadValueOrObject(Type type)) + } + // *** Writing *** + private void Write(byte[] data) // Write(byte[] data) is directly or indirectly called from several methods thus exceptions thrown here can also be thrown in all those methods + { + m_stream.Write(data, 0, data.Length); // throws IOException, NotSupportedException, ObjectDisposedException + } + public void WriteBool(bool val) + { + WriteByte(val ? (byte)1 : (byte)0); + } + public void WriteByte(byte val) // WriteByte(byte val) is directly or indirectly called from several methods thus exceptions thrown here can also be thrown in all those methods + { + m_stream.WriteByte(val); // throws IOException, NotSupportedException, ObjectDisposedException + } + public void WriteSByte(sbyte val) + { + WriteByte((byte)val); + } + private void WriteChar8(char val) + { + WriteByte(Encoding.ASCII.GetBytes(new char[] { val })[0]); + } + private void WriteChar16(char val) + { + Write(BitConverter.GetBytes((ushort)val)); + } + public void WriteChar(char val) + { + WriteChar16(val); + } + public void WriteDouble(double val) + { + Write(BitConverter.GetBytes(val)); + } + public void WriteFloat(float val) + { + Write(BitConverter.GetBytes(val)); + } + public void WriteInt(int val) + { + Write(BitConverter.GetBytes(val)); + } + public void WriteUInt(uint val) + { + Write(BitConverter.GetBytes(val)); + } + public void WriteLong(long val) + { + Write(BitConverter.GetBytes(val)); + } + public void WriteULong(ulong val) + { + Write(BitConverter.GetBytes(val)); + } + public void WriteShort(short val) + { + Write(BitConverter.GetBytes(val)); + } + public void WriteUShort(ushort val) + { + Write(BitConverter.GetBytes(val)); + } + private void WriteString8(string val) + { + if (val == null) { WriteInt(-1); return; } + WriteInt(val.Length); + Write(Encoding.ASCII.GetBytes(val)); + } + private void WriteString16(string val) + { + if (val == null) { WriteInt(-1); return; } + WriteInt(val.Length); + Write(Encoding.Unicode.GetBytes(val)); + } + public void WriteString(string val) + { + WriteString16(val); + } + public void WriteValue(ValueType val) + { + if (val is bool) + { + WriteBool((bool)val); + } + else if (val is byte) + { + WriteByte((byte)val); + } + else if (val is sbyte) + { + WriteSByte((sbyte)val); + } + else if (val is char) + { + WriteChar((char)val); + } + else if (val is double) + { + WriteDouble((double)val); + } + else if (val is float) + { + WriteFloat((float)val); + } + else if (val is int) + { + WriteInt((int)val); + } + else if (val is uint) + { + WriteUInt((uint)val); + } + else if (val is long) + { + WriteLong((long)val); + } + else if (val is ulong) + { + WriteULong((ulong)val); + } + else if (val is short) + { + WriteShort((short)val); + } + else if (val is ushort) + { + WriteUShort((ushort)val); + } + else if (val is Latino.ISerializable) + { + ((Latino.ISerializable)val).Save(this); // throws serialization-related exceptions + } + else + { + //throw new ArgumentTypeException("val"); + } + } + public void WriteObject(Type type, object obj) + { + //Utils.ThrowException(type == null ? new ArgumentNullException("type") : null); + //Utils.ThrowException((obj != null && !type.IsAssignableFrom(obj.GetType())) ? new ArgumentTypeException("obj") : null); + if (obj == null) + { + WriteByte(0); + } + else + { + Type obj_type = obj.GetType(); + if (obj_type == type) + { + WriteByte(1); + } + else + { + WriteByte(2); + WriteType(obj_type); + } + if (obj is string) + { + WriteString((string)obj); + } + else if (obj is Latino.ISerializable) + { + ((Latino.ISerializable)obj).Save(this); // throws serialization-related exceptions + } + else if (obj is ValueType) + { + WriteValue((ValueType)obj); // throws exceptions (see WriteValue(ValueType val)) + } + else + { + //throw new ArgumentTypeException("obj"); + } + } + } + public void WriteObject(T obj) + { + WriteObject(typeof(T), obj); // throws exceptions (see WriteObject(Type type, object obj)) + } + public void WriteValueOrObject(Type type, object obj) + { + //Utils.ThrowException(type == null ? new ArgumentNullException("type") : null); + //Utils.ThrowException(!type.IsAssignableFrom(obj.GetType()) ? new ArgumentTypeException("obj") : null); + if (type.IsValueType) + { + WriteValue((ValueType)obj); // throws exceptions (see WriteValue(ValueType val)) + } + else + { + WriteObject(type, obj); // throws exceptions (see WriteObject(Type type, object obj)) + } + } + public void WriteValueOrObject(T obj) + { + WriteValueOrObject(typeof(T), obj); // throws exceptions (see WriteValueOrObject(Type type, object obj)) + } + public void WriteType(Type type) + { + //Utils.ThrowException(type == null ? new ArgumentNullException("type") : null); + WriteString8(GetShortTypeName(type.AssemblyQualifiedName)); + } + // *** Data directory *** + public string DataDir + { + get { return m_data_dir; } + set + { + //Utils.ThrowException(!Utils.VerifyPathName(value, /*must_exist=*/true) ? new InvalidArgumentValueException("DataDir") : null); + m_data_dir = value; + } + } + // *** Access to the associated stream *** + public void Close() + { + m_stream.Close(); + } + public void Flush() + { + m_stream.Flush(); // throws IOException + } + public Stream Stream + { + get { return m_stream; } + } + } +} + +#endif diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/LemmaSharp.csproj b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/LemmaSharp.csproj new file mode 100644 index 0000000..1b0de67 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/LemmaSharp.csproj @@ -0,0 +1,175 @@ + + + + Debug + AnyCPU + 9.0.21022 + 2.0 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4} + Library + Properties + LemmaSharp + LemmaSharp + v4.7.2 + 512 + true + + + + + + + 3.5 + + http://localhost/LemmaSharp/ + true + Web + true + Foreground + 7 + Days + false + false + true + 0 + 1.0.0.%2a + false + true + + + + + true + bin\x64\Debug\ + DEBUG;TRACE + full + x64 + true + GlobalSuppressions.cs + prompt + false + + + bin\x64\Release\ + TRACE + true + pdbonly + x64 + true + GlobalSuppressions.cs + prompt + false + + + bin\x64\WPFDevelop\ + + + true + bin\Debug\ + DEBUG;TRACE + full + AnyCPU + prompt + MinimumRecommendedRules.ruleset + + + bin\Release\ + TRACE + true + pdbonly + AnyCPU + prompt + MinimumRecommendedRules.ruleset + + + bin\WPFDevelop\ + AnyCPU + + + true + bin\x86\Debug\ + DEBUG;TRACE + full + x86 + prompt + MinimumRecommendedRules.ruleset + + + bin\x86\Release\ + TRACE + true + pdbonly + x86 + prompt + MinimumRecommendedRules.ruleset + + + bin\x86\WPFDevelop\ + x86 + MinimumRecommendedRules.ruleset + + + + False + ExternalLibs\Lzma#.dll + + + + + + Code + + + + + + + + + + + + + + + + False + .NET Framework Client Profile + false + + + False + .NET Framework 2.0 %28x86%29 + true + + + False + .NET Framework 3.0 %28x86%29 + false + + + False + .NET Framework 3.5 + false + + + False + .NET Framework 3.5 SP1 + false + + + + + + + + + + + \ No newline at end of file diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/app.config b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/app.config new file mode 100644 index 0000000..312bb3f --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharp/app.config @@ -0,0 +1,3 @@ + + + diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Classes/LanguagePrebuilt.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Classes/LanguagePrebuilt.cs new file mode 100644 index 0000000..056d89c --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Classes/LanguagePrebuilt.cs @@ -0,0 +1,28 @@ +namespace LemmaSharp +{ + public enum LanguagePrebuilt + { + //from Multext-East v4 lexicons + Bulgarian, + Czech, + English, + Estonian, + Persian, + French, + Hungarian, + Macedonian, + Polish, + Romanian, + Russian, + Slovak, + Slovene, + Serbian, + Ukrainian, + //from Multext lexicons + EnglishMT, + FrenchMT, + German, + Italian, + Spanish, + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Classes/LemmatizerPrebuilt.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Classes/LemmatizerPrebuilt.cs new file mode 100644 index 0000000..0ade388 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Classes/LemmatizerPrebuilt.cs @@ -0,0 +1,117 @@ +using System; +using System.IO; +using System.Reflection; +using System.Runtime.Serialization; + +namespace LemmaSharp +{ + [Serializable] + public abstract class LemmatizerPrebuilt : Lemmatizer + { + #region Private Variables + private static string[] asLangMapping = new string[] { + "bg", "mlteast", + "cs", "mlteast", + "en", "mlteast", + "et", "mlteast", + "fa", "mlteast", + "fr", "mlteast", + "hu", "mlteast", + "mk", "mlteast", + "pl", "mlteast", + "ro", "mlteast", + "ru", "mlteast", + "sk", "mlteast", + "sl", "mlteast", + "sr", "mlteast", + "uk", "mlteast", + "en", "multext", + "fr", "multext", + "ge", "multext", + "it", "multext", + "sp", "multext", + }; + private LanguagePrebuilt lang; + #endregion + + #region Constructor(s) + public LemmatizerPrebuilt(LanguagePrebuilt lang) + : base() + { + this.lang = lang; + } + + public LemmatizerPrebuilt(LanguagePrebuilt lang, LemmatizerSettings lsett) + : base(lsett) + { + this.lang = lang; + } + #endregion + + #region Private Properties Helping Functions + protected string GetResourceFileName(string sFileMask) + { + return GetResourceFileName(sFileMask, lang); + } + + public static string GetResourceFileName(string sFileMask, LanguagePrebuilt lang) + { + var langFileName = string.Format("{0}-{1}", asLangMapping[(int)lang * 2 + 1], asLangMapping[(int)lang * 2]); + return string.Format(sFileMask, langFileName); + } + #endregion + + #region Public Properties + public LanguagePrebuilt Language + { + get + { + return lang; + } + } + public LexiconPrebuilt Lexicon + { + get + { + return GetLexicon(lang); + } + } + #endregion + + #region Public Properties + public static LexiconPrebuilt GetLexicon(LanguagePrebuilt lang) + { + return (LexiconPrebuilt)Enum.Parse(typeof(LexiconPrebuilt), asLangMapping[((int)lang) * 2 + 1], true); + } + #endregion + + #region Resource Management Functions + protected abstract Assembly GetExecutingAssembly(); + + protected Stream GetResourceStream(string sResourceShortName) + { + var assembly = GetExecutingAssembly(); + string sResourceName = null; + foreach (string sResource in assembly.GetManifestResourceNames()) + { + if (sResource.EndsWith(sResourceShortName)) + { + sResourceName = sResource; + break; + } + } + if (string.IsNullOrEmpty(sResourceName)) + return null; + + return assembly.GetManifestResourceStream(sResourceName); + } + #endregion + + #region Serialization Functions + public LemmatizerPrebuilt(SerializationInfo info, StreamingContext context) + : base(info, context) + { + } + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Classes/LemmatizerPrebuiltFull.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Classes/LemmatizerPrebuiltFull.cs new file mode 100644 index 0000000..c5284cb --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Classes/LemmatizerPrebuiltFull.cs @@ -0,0 +1,30 @@ +using System; +using System.Reflection; + +namespace LemmaSharp +{ + [Serializable] + public class LemmatizerPrebuiltFull : LemmatizerPrebuilt + { + public const string FILEMASK = "full7z-{0}.lem"; + + #region Constructor(s) + public LemmatizerPrebuiltFull(LanguagePrebuilt lang) + : base(lang) + { + using (var stream = GetResourceStream(GetResourceFileName(FILEMASK))) + { + this.Deserialize(stream); + stream.Close(); + } + } + #endregion + + #region Resource Management Functions + protected override Assembly GetExecutingAssembly() + { + return Assembly.GetExecutingAssembly(); + } + #endregion + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Classes/LexiconPrebuilt.cs b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Classes/LexiconPrebuilt.cs new file mode 100644 index 0000000..1a432fc --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Classes/LexiconPrebuilt.cs @@ -0,0 +1,8 @@ +namespace LemmaSharp +{ + public enum LexiconPrebuilt + { + MltEast, + Multext + } +} diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-bg.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-bg.lem new file mode 100644 index 0000000..a6fddbc Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-bg.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-cs.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-cs.lem new file mode 100644 index 0000000..29736f9 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-cs.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-en.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-en.lem new file mode 100644 index 0000000..52ba798 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-en.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-et.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-et.lem new file mode 100644 index 0000000..d35e6e5 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-et.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-fa.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-fa.lem new file mode 100644 index 0000000..696a4d3 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-fa.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-fr.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-fr.lem new file mode 100644 index 0000000..1483877 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-fr.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-hu.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-hu.lem new file mode 100644 index 0000000..ceca7b1 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-hu.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-mk.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-mk.lem new file mode 100644 index 0000000..5fde9d6 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-mk.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-pl.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-pl.lem new file mode 100644 index 0000000..0a5af36 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-pl.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-ro.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-ro.lem new file mode 100644 index 0000000..d337f42 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-ro.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-ru.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-ru.lem new file mode 100644 index 0000000..2140050 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-ru.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-sk.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-sk.lem new file mode 100644 index 0000000..c942f08 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-sk.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-sl.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-sl.lem new file mode 100644 index 0000000..4d96c9a Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-sl.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-sr.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-sr.lem new file mode 100644 index 0000000..c570074 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-sr.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-uk.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-uk.lem new file mode 100644 index 0000000..13a1f03 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-mlteast-uk.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-multext-en.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-multext-en.lem new file mode 100644 index 0000000..a17c72c Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-multext-en.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-multext-fr.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-multext-fr.lem new file mode 100644 index 0000000..5ace281 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-multext-fr.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-multext-ge.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-multext-ge.lem new file mode 100644 index 0000000..ac4e1e3 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-multext-ge.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-multext-it.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-multext-it.lem new file mode 100644 index 0000000..168105b Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-multext-it.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-multext-sp.lem b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-multext-sp.lem new file mode 100644 index 0000000..29eb6a9 Binary files /dev/null and b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/Data/full7z-multext-sp.lem differ diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/LemmaSharpPrebuilt.csproj b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/LemmaSharpPrebuilt.csproj new file mode 100644 index 0000000..822da36 --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/LemmaSharpPrebuilt.csproj @@ -0,0 +1,140 @@ + + + + Debug + AnyCPU + 9.0.21022 + 2.0 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564} + Library + Properties + LemmaSharp + LemmaSharpPrebuilt + v4.7.2 + 512 + + + + + 3.5 + + publish\ + true + Disk + false + Foreground + 7 + Days + false + false + true + 0 + 1.0.0.%2a + false + false + true + + + true + bin\x64\Debug\ + DEBUG;TRACE + full + x64 + true + GlobalSuppressions.cs + prompt + false + + + bin\x64\Release\ + TRACE + true + pdbonly + x64 + true + GlobalSuppressions.cs + prompt + false + + + bin\x64\WPFDevelop\ + + + true + bin\Debug\ + DEBUG;TRACE + full + AnyCPU + prompt + MinimumRecommendedRules.ruleset + + + bin\Release\ + TRACE + true + pdbonly + AnyCPU + prompt + MinimumRecommendedRules.ruleset + + + bin\WPFDevelop\ + AnyCPU + + + true + bin\x86\Debug\ + DEBUG;TRACE + full + x86 + prompt + MinimumRecommendedRules.ruleset + + + bin\x86\Release\ + TRACE + true + pdbonly + x86 + prompt + MinimumRecommendedRules.ruleset + + + bin\x86\WPFDevelop\ + x86 + MinimumRecommendedRules.ruleset + + + + + + + + + {A39293C1-92D8-47B9-93A4-41F443B4F9E4} + LemmaSharp + + + + + + + + + False + .NET Framework 3.5 SP1 + true + + + + + + + + \ No newline at end of file diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/LemmaSharpPrebuiltFull.csproj b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/LemmaSharpPrebuiltFull.csproj new file mode 100644 index 0000000..7a0f58a --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/LemmaSharpPrebuiltFull.csproj @@ -0,0 +1,137 @@ + + + + Debug + AnyCPU + 9.0.21022 + 2.0 + {D926B493-78B6-4FAB-A354-53869F664B5B} + Library + Properties + LemmaSharpPrebuiltFull + LemmaSharpPrebuiltFull + v4.7.2 + 512 + + + + + 3.5 + + publish\ + true + Disk + false + Foreground + 7 + Days + false + false + true + 0 + 1.0.0.%2a + false + false + true + + + + + x64 + bin\x64\Debug\ + + + x64 + bin\x64\Release\ + + + bin\x64\WPFDevelop\ + + + true + bin\Debug\ + AnyCPU + MinimumRecommendedRules.ruleset + + + bin\Release\ + AnyCPU + MinimumRecommendedRules.ruleset + + + bin\WPFDevelop\ + AnyCPU + + + true + bin\x86\Debug\ + x86 + MinimumRecommendedRules.ruleset + + + bin\x86\Release\ + x86 + MinimumRecommendedRules.ruleset + + + bin\x86\WPFDevelop\ + x86 + MinimumRecommendedRules.ruleset + + + + + + + + + + {A39293C1-92D8-47B9-93A4-41F443B4F9E4} + LemmaSharp + + + {1e700d21-62d3-4525-93fe-c1fb0a1b0564} + LemmaSharpPrebuilt + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + False + .NET Framework 3.5 SP1 + true + + + + + \ No newline at end of file diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/app.config b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/app.config new file mode 100644 index 0000000..312bb3f --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuilt/app.config @@ -0,0 +1,3 @@ + + + diff --git a/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuiltFull.sln b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuiltFull.sln new file mode 100644 index 0000000..ad553be --- /dev/null +++ b/WebSemanticService/Vendors/LemmaGen/LemmaGen_v3.0_PrebuiltFull/LemmaSharpPrebuiltFull.sln @@ -0,0 +1,58 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 14 +VisualStudioVersion = 14.0.25420.1 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LemmaSharp", "LemmaSharp\LemmaSharp.csproj", "{A39293C1-92D8-47B9-93A4-41F443B4F9E4}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LemmaSharpPrebuiltFull", "LemmaSharpPrebuilt\LemmaSharpPrebuiltFull.csproj", "{D926B493-78B6-4FAB-A354-53869F664B5B}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LemmaSharpPrebuilt", "LemmaSharpPrebuilt\LemmaSharpPrebuilt.csproj", "{1E700D21-62D3-4525-93FE-C1FB0A1B0564}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|Any CPU.Build.0 = Debug|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x64.ActiveCfg = Debug|x64 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x64.Build.0 = Debug|x64 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x86.ActiveCfg = Debug|x86 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x86.Build.0 = Debug|x86 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|Any CPU.ActiveCfg = Release|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|Any CPU.Build.0 = Release|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x64.ActiveCfg = Release|x64 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x64.Build.0 = Release|x64 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x86.ActiveCfg = Release|x86 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x86.Build.0 = Release|x86 + {D926B493-78B6-4FAB-A354-53869F664B5B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.Debug|x64.ActiveCfg = Debug|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.Debug|x86.ActiveCfg = Debug|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.Release|Any CPU.Build.0 = Release|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.Release|x64.ActiveCfg = Release|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.Release|x86.ActiveCfg = Release|Any CPU + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|Any CPU.Build.0 = Debug|Any CPU + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|x64.ActiveCfg = Debug|x64 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|x64.Build.0 = Debug|x64 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|x86.ActiveCfg = Debug|x86 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|x86.Build.0 = Debug|x86 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|Any CPU.ActiveCfg = Release|Any CPU + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|Any CPU.Build.0 = Release|Any CPU + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|x64.ActiveCfg = Release|x64 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|x64.Build.0 = Release|x64 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|x86.ActiveCfg = Release|x86 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|x86.Build.0 = Release|x86 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/WebSemanticService/WebSemanticService.sln b/WebSemanticService/WebSemanticService.sln new file mode 100644 index 0000000..5beb859 --- /dev/null +++ b/WebSemanticService/WebSemanticService.sln @@ -0,0 +1,153 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.329 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LemmaSharp", "Vendors\LemmaGen\LemmaGen_v3.0_PrebuiltFull\LemmaSharp\LemmaSharp.csproj", "{A39293C1-92D8-47B9-93A4-41F443B4F9E4}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LemmaSharpPrebuilt", "Vendors\LemmaGen\LemmaGen_v3.0_PrebuiltFull\LemmaSharpPrebuilt\LemmaSharpPrebuilt.csproj", "{1E700D21-62D3-4525-93FE-C1FB0A1B0564}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LemmaSharpPrebuiltFull", "Vendors\LemmaGen\LemmaGen_v3.0_PrebuiltFull\LemmaSharpPrebuilt\LemmaSharpPrebuiltFull.csproj", "{D926B493-78B6-4FAB-A354-53869F664B5B}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Infrastructure", "Infrastructure", "{E803B141-93AE-42BD-A068-4C95DD99C923}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Semantic.API", "semantic\Semantic.API\Semantic.API.csproj", "{E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Semantic.API.Proxy", "semantic\Semantic.API.Proxy\Semantic.API.Proxy.csproj", "{3413DF58-8BA9-4276-9C6D-6F67B527C9AF}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Test.Semantic.API.Proxy", "semantic\Semantic.API\Test.Semantic.API.Proxy\Test.Semantic.API.Proxy.csproj", "{1FF0A134-65CB-4059-B93E-F7E34BBB53FE}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + WPFDevelop|Any CPU = WPFDevelop|Any CPU + WPFDevelop|x64 = WPFDevelop|x64 + WPFDevelop|x86 = WPFDevelop|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|Any CPU.Build.0 = Debug|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x64.ActiveCfg = Debug|x64 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x64.Build.0 = Debug|x64 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x86.ActiveCfg = Debug|x86 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x86.Build.0 = Debug|x86 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|Any CPU.ActiveCfg = Release|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|Any CPU.Build.0 = Release|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x64.ActiveCfg = Release|x64 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x64.Build.0 = Release|x64 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x86.ActiveCfg = Release|x86 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x86.Build.0 = Release|x86 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.WPFDevelop|Any CPU.ActiveCfg = WPFDevelop|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.WPFDevelop|Any CPU.Build.0 = WPFDevelop|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.WPFDevelop|x64.ActiveCfg = WPFDevelop|x64 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.WPFDevelop|x64.Build.0 = WPFDevelop|x64 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.WPFDevelop|x86.ActiveCfg = WPFDevelop|x86 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.WPFDevelop|x86.Build.0 = WPFDevelop|x86 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|Any CPU.Build.0 = Debug|Any CPU + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|x64.ActiveCfg = Debug|x64 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|x64.Build.0 = Debug|x64 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|x86.ActiveCfg = Debug|x86 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|x86.Build.0 = Debug|x86 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|Any CPU.ActiveCfg = Release|Any CPU + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|Any CPU.Build.0 = Release|Any CPU + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|x64.ActiveCfg = Release|x64 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|x64.Build.0 = Release|x64 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|x86.ActiveCfg = Release|x86 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|x86.Build.0 = Release|x86 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.WPFDevelop|Any CPU.ActiveCfg = WPFDevelop|Any CPU + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.WPFDevelop|Any CPU.Build.0 = WPFDevelop|Any CPU + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.WPFDevelop|x64.ActiveCfg = WPFDevelop|x64 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.WPFDevelop|x64.Build.0 = WPFDevelop|x64 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.WPFDevelop|x86.ActiveCfg = WPFDevelop|x86 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.WPFDevelop|x86.Build.0 = WPFDevelop|x86 + {D926B493-78B6-4FAB-A354-53869F664B5B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.Debug|x64.ActiveCfg = Debug|x64 + {D926B493-78B6-4FAB-A354-53869F664B5B}.Debug|x64.Build.0 = Debug|x64 + {D926B493-78B6-4FAB-A354-53869F664B5B}.Debug|x86.ActiveCfg = Debug|x86 + {D926B493-78B6-4FAB-A354-53869F664B5B}.Debug|x86.Build.0 = Debug|x86 + {D926B493-78B6-4FAB-A354-53869F664B5B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.Release|Any CPU.Build.0 = Release|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.Release|x64.ActiveCfg = Release|x64 + {D926B493-78B6-4FAB-A354-53869F664B5B}.Release|x64.Build.0 = Release|x64 + {D926B493-78B6-4FAB-A354-53869F664B5B}.Release|x86.ActiveCfg = Release|x86 + {D926B493-78B6-4FAB-A354-53869F664B5B}.Release|x86.Build.0 = Release|x86 + {D926B493-78B6-4FAB-A354-53869F664B5B}.WPFDevelop|Any CPU.ActiveCfg = WPFDevelop|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.WPFDevelop|Any CPU.Build.0 = WPFDevelop|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.WPFDevelop|x64.ActiveCfg = WPFDevelop|x64 + {D926B493-78B6-4FAB-A354-53869F664B5B}.WPFDevelop|x64.Build.0 = WPFDevelop|x64 + {D926B493-78B6-4FAB-A354-53869F664B5B}.WPFDevelop|x86.ActiveCfg = WPFDevelop|x86 + {D926B493-78B6-4FAB-A354-53869F664B5B}.WPFDevelop|x86.Build.0 = WPFDevelop|x86 + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Debug|Any CPU.Build.0 = Debug|Any CPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Debug|x64.ActiveCfg = Debug|x64 + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Debug|x64.Build.0 = Debug|x64 + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Debug|x86.ActiveCfg = Debug|Any CPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Debug|x86.Build.0 = Debug|Any CPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Release|Any CPU.ActiveCfg = Release|Any CPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Release|Any CPU.Build.0 = Release|Any CPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Release|x64.ActiveCfg = Release|x64 + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Release|x64.Build.0 = Release|x64 + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Release|x86.ActiveCfg = Release|Any CPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Release|x86.Build.0 = Release|Any CPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.WPFDevelop|Any CPU.ActiveCfg = WPFDevelop|Any CPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.WPFDevelop|Any CPU.Build.0 = WPFDevelop|Any CPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.WPFDevelop|x64.ActiveCfg = WPFDevelop|x64 + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.WPFDevelop|x64.Build.0 = WPFDevelop|x64 + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.WPFDevelop|x86.ActiveCfg = WPFDevelop|Any CPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.WPFDevelop|x86.Build.0 = WPFDevelop|Any CPU + {3413DF58-8BA9-4276-9C6D-6F67B527C9AF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {3413DF58-8BA9-4276-9C6D-6F67B527C9AF}.Debug|Any CPU.Build.0 = Debug|Any CPU + {3413DF58-8BA9-4276-9C6D-6F67B527C9AF}.Debug|x64.ActiveCfg = Debug|x64 + {3413DF58-8BA9-4276-9C6D-6F67B527C9AF}.Debug|x64.Build.0 = Debug|x64 + {3413DF58-8BA9-4276-9C6D-6F67B527C9AF}.Debug|x86.ActiveCfg = Debug|Any CPU + {3413DF58-8BA9-4276-9C6D-6F67B527C9AF}.Debug|x86.Build.0 = Debug|Any CPU + {3413DF58-8BA9-4276-9C6D-6F67B527C9AF}.Release|Any CPU.ActiveCfg = Release|Any CPU + {3413DF58-8BA9-4276-9C6D-6F67B527C9AF}.Release|Any CPU.Build.0 = Release|Any CPU + {3413DF58-8BA9-4276-9C6D-6F67B527C9AF}.Release|x64.ActiveCfg = Release|x64 + {3413DF58-8BA9-4276-9C6D-6F67B527C9AF}.Release|x64.Build.0 = Release|x64 + {3413DF58-8BA9-4276-9C6D-6F67B527C9AF}.Release|x86.ActiveCfg = Release|Any CPU + {3413DF58-8BA9-4276-9C6D-6F67B527C9AF}.Release|x86.Build.0 = Release|Any CPU + {3413DF58-8BA9-4276-9C6D-6F67B527C9AF}.WPFDevelop|Any CPU.ActiveCfg = WPFDevelop|Any CPU + {3413DF58-8BA9-4276-9C6D-6F67B527C9AF}.WPFDevelop|Any CPU.Build.0 = WPFDevelop|Any CPU + {3413DF58-8BA9-4276-9C6D-6F67B527C9AF}.WPFDevelop|x64.ActiveCfg = WPFDevelop|x64 + {3413DF58-8BA9-4276-9C6D-6F67B527C9AF}.WPFDevelop|x64.Build.0 = WPFDevelop|x64 + {3413DF58-8BA9-4276-9C6D-6F67B527C9AF}.WPFDevelop|x86.ActiveCfg = WPFDevelop|Any CPU + {3413DF58-8BA9-4276-9C6D-6F67B527C9AF}.WPFDevelop|x86.Build.0 = WPFDevelop|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Debug|Any CPU.Build.0 = Debug|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Debug|x64.ActiveCfg = Debug|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Debug|x64.Build.0 = Debug|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Debug|x86.ActiveCfg = Debug|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Debug|x86.Build.0 = Debug|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Release|Any CPU.ActiveCfg = Release|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Release|Any CPU.Build.0 = Release|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Release|x64.ActiveCfg = Release|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Release|x64.Build.0 = Release|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Release|x86.ActiveCfg = Release|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Release|x86.Build.0 = Release|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.WPFDevelop|Any CPU.ActiveCfg = Release|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.WPFDevelop|Any CPU.Build.0 = Release|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.WPFDevelop|x64.ActiveCfg = Release|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.WPFDevelop|x64.Build.0 = Release|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.WPFDevelop|x86.ActiveCfg = Release|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.WPFDevelop|x86.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(NestedProjects) = preSolution + {A39293C1-92D8-47B9-93A4-41F443B4F9E4} = {E803B141-93AE-42BD-A068-4C95DD99C923} + {1E700D21-62D3-4525-93FE-C1FB0A1B0564} = {E803B141-93AE-42BD-A068-4C95DD99C923} + {D926B493-78B6-4FAB-A354-53869F664B5B} = {E803B141-93AE-42BD-A068-4C95DD99C923} + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {F2BB4EB7-EA7A-4A8D-9532-6C3BCC64095D} + EndGlobalSection +EndGlobal diff --git a/WebSemanticService/semantic/Semantic.API.Proxy/BaseProxy.cs b/WebSemanticService/semantic/Semantic.API.Proxy/BaseProxy.cs new file mode 100644 index 0000000..1989ea3 --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API.Proxy/BaseProxy.cs @@ -0,0 +1,202 @@ +using Newtonsoft.Json; +using System; +using System.Collections.Generic; +using System.Collections.Specialized; +using System.IO; +using System.Linq; +using System.Net; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using ZeroLevel; + +namespace Semantic.API.Proxy +{ + /// + /// Base async rest client + /// + public abstract class BaseProxy + { + #region Fields + private readonly string _baseUri; + #endregion + + #region Ctors + static BaseProxy() + { + ServicePointManager.ServerCertificateValidationCallback += (sender, certificate, chain, sslPolicyErrors) => true; + ServicePointManager.Expect100Continue = false; + ServicePointManager.DefaultConnectionLimit = 8; + } + + public BaseProxy(string baseUri) + { + if (String.IsNullOrWhiteSpace(baseUri)) throw new ArgumentNullException("baseUri"); + _baseUri = baseUri; + } + #endregion + + #region Requests + protected T Get(string resource, NameValueCollection parameters = null) + { + return GET(_baseUri, resource, parameters); + } + + protected T Post(string resource, object body, NameValueCollection parameters = null) + { + return POST(_baseUri, resource, body, parameters); + } + + protected T Put(string resource, object body, NameValueCollection parameters = null) + { + return PUT(_baseUri, resource, body, parameters); + } + + protected T Delete(string resource, NameValueCollection parameters = null) + { + return DELETE(_baseUri, resource, parameters); + } + + protected T Delete(string resource, object body, NameValueCollection parameters = null) + { + return DELETE(_baseUri, resource, body, parameters); + } + #endregion + + #region Helpers + private Uri BuildRequestUrl(string baseUri, string resource, NameValueCollection parameters) + { + if (null == resource) throw new ArgumentNullException("resource"); + var stringBuilder = new StringBuilder(baseUri); + if (baseUri[baseUri.Length - 1] != '/') + stringBuilder.Append('/'); + stringBuilder.Append(resource); + if (parameters != null && parameters.Count > 0) + { + stringBuilder.Append("?"); + foreach (string key in parameters.Keys) + { + var val = parameters[key]; + if (string.IsNullOrWhiteSpace(val)) + { + stringBuilder.Append(key); + } + else + { + stringBuilder.AppendFormat("{0}={1}", key, val); + } + stringBuilder.Append("&"); + } + } + return new Uri(stringBuilder.ToString().TrimEnd('&')); + } + + + #region Requests + private T SendRequest(string baseUri, string resource, string method, object body, NameValueCollection parameters = null) + { + string statusCode = null; + string reason = null; + try + { + var request = (HttpWebRequest)WebRequest.Create(BuildRequestUrl(baseUri, resource, parameters)); + request.ContinueTimeout = 30000; + request.ReadWriteTimeout = 30000; + request.Timeout = Timeout.Infinite; + request.MaximumResponseHeadersLength = int.MaxValue; + request.Method = method; + request.Proxy = null; + request.UserAgent = "DocStream"; + request.AutomaticDecompression = DecompressionMethods.GZip; + if (body != null) + { + request.Accept = "application/json"; + request.ContentType = "application/json"; + using (var streamWriter = new StreamWriter(request.GetRequestStream())) + { + var json = JsonConvert.SerializeObject(body); + streamWriter.Write(json); + streamWriter.Flush(); + } + } + var response = (HttpWebResponse)(request.GetResponse()); + using (response) + { + statusCode = response.StatusCode.ToString(); + reason = response.StatusDescription; + if (response.StatusCode == HttpStatusCode.OK) + { + using (var stream = new StreamReader(response.GetResponseStream())) + { + var json = stream.ReadToEnd(); + return JsonConvert.DeserializeObject(json); + } + } + else + { + Log.Warning($"[BaseAsyncProxy]\t{method}\t'{baseUri}/{resource}'. Status code: {statusCode ?? "Uncknown"}. Reason: {reason ?? string.Empty}"); + } + } + } + catch (WebException ex) + { + try + { + if ((ex.Status == WebExceptionStatus.ProtocolError) && (ex.Response != null)) + { + Log.Warning("[BaseAsyncProxy] WebAPI protocol error, try restore content"); + HttpWebResponse response = ex.Response as HttpWebResponse; + statusCode = response.StatusCode.ToString(); + { + using (var stream = new StreamReader(response.GetResponseStream())) + { + var json = stream.ReadToEnd(); + return JsonConvert.DeserializeObject(json); + } + } + } + else + { + Log.Error(ex, $"[BaseAsyncProxy]\t{method}\t'{baseUri}/{resource}'. Status code: {statusCode ?? "Uncknown"}. Reason: {reason ?? ex.Message}"); + } + } + catch (Exception ex1) + { + Log.Error(ex1, $"[BaseAsyncProxy]\t{method}\t'{baseUri}/{resource}'. Status code: {statusCode ?? "Uncknown"}. Reason: {reason ?? ex.Message}"); + } + } + catch (Exception ex) + { + Log.Error(ex, $"[BaseAsyncProxy]\t{method}\t'{baseUri}/{resource}'. Status code: {statusCode ?? "Uncknown"}. Reason: {reason ?? ex.Message}"); + } + return default(T); + } + + private T GET(string baseUri, string resource, NameValueCollection parameters = null) + { + return SendRequest(baseUri, resource, "GET", null, parameters); + } + + private T POST(string baseUri, string resource, object body, NameValueCollection parameters = null) + { + return SendRequest(baseUri, resource, "POST", body, parameters); + } + + private T PUT(string baseUri, string resource, object body, NameValueCollection parameters = null) + { + return SendRequest(baseUri, resource, "PUT", body, parameters); + } + + private T DELETE(string baseUri, string resource, NameValueCollection parameters = null) + { + return SendRequest(baseUri, resource, "DELETE", null, parameters); + } + + private T DELETE(string baseUri, string resource, object body, NameValueCollection parameters = null) + { + return SendRequest(baseUri, resource, "DELETE", body, parameters); + } + #endregion + #endregion + } +} diff --git a/WebSemanticService/semantic/Semantic.API.Proxy/Properties/AssemblyInfo.cs b/WebSemanticService/semantic/Semantic.API.Proxy/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..3540ce2 --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API.Proxy/Properties/AssemblyInfo.cs @@ -0,0 +1,36 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("Semantic.API.Proxy")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("Semantic.API.Proxy")] +[assembly: AssemblyCopyright("Copyright © 2016")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("75305c4a-d132-4ceb-8381-583a50135415")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/WebSemanticService/semantic/Semantic.API.Proxy/Semantic.API.Proxy.csproj b/WebSemanticService/semantic/Semantic.API.Proxy/Semantic.API.Proxy.csproj new file mode 100644 index 0000000..23948d3 --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API.Proxy/Semantic.API.Proxy.csproj @@ -0,0 +1,72 @@ + + + + + Debug + AnyCPU + {3413DF58-8BA9-4276-9C6D-6F67B527C9AF} + Library + Properties + Semantic.API.Proxy + Semantic.API.Proxy + v4.7.2 + 512 + + + + + + x64 + bin\x64\Debug\ + + + x64 + bin\x64\Release\ + + + bin\x64\WPFDevelop\ + + + true + bin\Debug\ + AnyCPU + MinimumRecommendedRules.ruleset + + + bin\Release\ + AnyCPU + MinimumRecommendedRules.ruleset + + + bin\WPFDevelop\ + AnyCPU + + + + ..\..\packages\Newtonsoft.Json.12.0.2\lib\net45\Newtonsoft.Json.dll + + + + + ..\..\packages\ZeroLevel.2.0.8\lib\netstandard2.0\ZeroLevel.dll + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/WebSemanticService/semantic/Semantic.API.Proxy/SemanticApiProxy.cs b/WebSemanticService/semantic/Semantic.API.Proxy/SemanticApiProxy.cs new file mode 100644 index 0000000..09da94a --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API.Proxy/SemanticApiProxy.cs @@ -0,0 +1,188 @@ +using System.Collections.Generic; +using ZeroLevel.Services.Semantic; + +namespace Semantic.API.Proxy +{ + /// + /// Предоставляет доступ к Prime Semantic API + /// + public sealed class SemanticApiProxy + : BaseProxy + { + public SemanticApiProxy(string baseUri) + : base(baseUri) + { + } + + #region Split to words + /// + /// Разделение текста на слова + /// + /// Список слов + public IEnumerable ExtractWords(string text) + { + return Post>("/api/text/words", text); + } + /// + /// Разделение текста на слова, без повторов + /// + /// Список слов + public IEnumerable ExtractUniqueWords(string text) + { + return Post>("/api/text/words/unique", text); + } + /// + /// Разделение текста на слова без стоп-слов и повторов + /// + /// Список слов + public IEnumerable ExtractUniqueWordsWithoutStopWords(string text) + { + return Post>("/api/text/words/clean", text); + } + #endregion + + #region Stemming + /// + /// Разделение текста на стемы (основы слов) + /// + /// Список стемов + public IEnumerable ExtractStems(string text) + { + return Post>("/api/stem", text); + } + /// + /// Разделение текста на стемы (основы слов) без повторов + /// + /// Список стемов + public IEnumerable ExtractUniqueStems(string text) + { + return Post>("/api/stem/unique", text); + } + /// + /// Разделение текста на токены, на основе стемов + /// + /// Список токенов (оригинальное слово, стем, позиция в тексте) + public IEnumerable ExtractUniqueStemsWithoutStopWords(string text) + { + return Post>("/api/stem/clean", text); + } + #endregion + + #region Lemmatization + /// + /// Разделение текста на леммы (начальные формы слов) + /// + /// Список лемм + public IEnumerable ExtractLemmas(string text) + { + return Post>("/api/lemma", text); + } + /// + /// Разделение текста на леммы (начальные формы слов) без повторов + /// + /// Список лемм + public IEnumerable ExtractUniqueLemmas(string text) + { + return Post>("/api/lemma/unique", text); + } + /// + /// Разделение текста на леммы (начальные формы слов) без повторов и стоп-слов + /// + /// Список лемм + public IEnumerable ExtractUniqueLemmasWithoutStopWords(string text) + { + return Post>("/api/lemma/clean", text); + } + #endregion + + #region Words occurences + /// + /// Поиск вхождений слов в текст + /// + /// Текст + /// Массив слов для поиска + /// Список токенов (слово, позиция) + public IDictionary> SearchWordsInTextDirectly(string text, string[] words) + { + return Post>>("/api/text/occurences/words", new WordsSearchRequest + { + Text = text, + Words = words + }); + } + /// + /// Поиск вхождений слов в текст, на основе стемминга + /// + /// Текст + /// Массив слов для поиска + /// Список токенов (слово, стем, позиция) + public IDictionary> SearchWordsInTextByStemming(string text, string[] words) + { + return Post>>("/api/stem/occurences/words", new WordsSearchRequest + { + Text = text, + Words = words + }); + } + /// + /// Поиск вхождений слов в текст, на основе лемматизации + /// + /// Текст + /// Массив слов для поиска + /// Список токенов (слово, лемма, позиция) + public IDictionary> SearchWordsInTextByLemmas(string text, string[] words) + { + return Post>>("/api/lemma/occurences/words", new WordsSearchRequest + { + Text = text, + Words = words + }); + } + #endregion + + #region Phrase occurences + /// + /// Поиск вхождений фраз в текст + /// + /// Текст + /// Массив фраз для поиска + /// Список фраз в тексте соответствующих поисковому запросу + public IDictionary> SearchPhrasesInTextDirectly(string text, string[] phrases) + { + return Post>>("/api/text/occurences/phrases", new WordsSearchRequest + { + Text = text, + Words = phrases + }); + } + /// + /// Поиск вхождений фраз в текст, на основе стемминга + /// + /// Текст + /// Массив фраз для поиска + /// Список фраз в тексте соответствующих поисковому запросу + public IDictionary> SearchPhrasesInTextByStemming(string text, string[] phrases) + { + return Post>>("/api/stem/occurences/phrases", new WordsSearchRequest + { + Text = text, + Words = phrases + }); + } + /// + /// Поиск вхождений фраз в текст, на основе лемматизации + /// + /// Текст + /// Массив фраз для поиска + /// Список фраз в тексте соответствующих поисковому запросу + public IDictionary> SearchPhrasesInTextByLemmas(string text, string[] phrases) + { + return Post>>("api/lemma/occurences/phrases", new WordsSearchRequest + { + Text = text, + Words = phrases + }); + } + #endregion + } +} diff --git a/WebSemanticService/semantic/Semantic.API.Proxy/WordsSearchRequest.cs b/WebSemanticService/semantic/Semantic.API.Proxy/WordsSearchRequest.cs new file mode 100644 index 0000000..a6be967 --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API.Proxy/WordsSearchRequest.cs @@ -0,0 +1,8 @@ +namespace Semantic.API.Proxy +{ + public class WordsSearchRequest + { + public string Text; + public string[] Words; + } +} diff --git a/WebSemanticService/semantic/Semantic.API.Proxy/app.config b/WebSemanticService/semantic/Semantic.API.Proxy/app.config new file mode 100644 index 0000000..eee72dd --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API.Proxy/app.config @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/WebSemanticService/semantic/Semantic.API.Proxy/packages.config b/WebSemanticService/semantic/Semantic.API.Proxy/packages.config new file mode 100644 index 0000000..6f6fd07 --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API.Proxy/packages.config @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/WebSemanticService/semantic/Semantic.API/HostService.cs b/WebSemanticService/semantic/Semantic.API/HostService.cs new file mode 100644 index 0000000..251d550 --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API/HostService.cs @@ -0,0 +1,22 @@ +using ZeroLevel.Services.Applications; + +namespace Semantic.API +{ + public class HostService : + BaseZeroService + { + public HostService() : base("Semantic api service") + { + } + + protected override void StartAction() + { + // Запуск web API + Startup.Run(false, false); + } + + protected override void StopAction() + { + } + } +} diff --git a/WebSemanticService/semantic/Semantic.API/Model/PhrasesLemmaRequest.cs b/WebSemanticService/semantic/Semantic.API/Model/PhrasesLemmaRequest.cs new file mode 100644 index 0000000..d224a7c --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API/Model/PhrasesLemmaRequest.cs @@ -0,0 +1,8 @@ +namespace Semantic.API.Model +{ + public class PhrasesLemmaRequest + { + public string Text; + public string[] Phrases; + } +} diff --git a/WebSemanticService/semantic/Semantic.API/Model/WordsSearchRequest.cs b/WebSemanticService/semantic/Semantic.API/Model/WordsSearchRequest.cs new file mode 100644 index 0000000..92d7cf4 --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API/Model/WordsSearchRequest.cs @@ -0,0 +1,8 @@ +namespace Semantic.API.Model +{ + public class WordsSearchRequest + { + public string Text; + public string[] Words; + } +} diff --git a/WebSemanticService/semantic/Semantic.API/Program.cs b/WebSemanticService/semantic/Semantic.API/Program.cs new file mode 100644 index 0000000..a5c801a --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API/Program.cs @@ -0,0 +1,12 @@ +using ZeroLevel; + +namespace Semantic.API +{ + static class Program + { + static void Main(params string[] args) + { + Bootstrap.Startup(args, () => { Log.Backlog(100); return true; }); + } + } +} diff --git a/WebSemanticService/semantic/Semantic.API/Properties/AssemblyInfo.cs b/WebSemanticService/semantic/Semantic.API/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..26fcb2e --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API/Properties/AssemblyInfo.cs @@ -0,0 +1,36 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("Semantic.API")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("Semantic.API")] +[assembly: AssemblyCopyright("Copyright © 2016")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("975a8cbc-3c1e-48f3-be6d-525e8ed578ca")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/WebSemanticService/semantic/Semantic.API/Semantic.API.csproj b/WebSemanticService/semantic/Semantic.API/Semantic.API.csproj new file mode 100644 index 0000000..57b3a8e --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API/Semantic.API.csproj @@ -0,0 +1,183 @@ + + + + + Debug + AnyCPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A} + Exe + Properties + Semantic.API + Semantic.API + v4.7.2 + 512 + + + + x64 + bin\x64\Debug\ + + + x64 + bin\x64\Release\ + + + bin\x64\WPFDevelop\ + + + true + bin\Debug\ + AnyCPU + MinimumRecommendedRules.ruleset + true + + + bin\Release\ + AnyCPU + MinimumRecommendedRules.ruleset + true + + + bin\WPFDevelop\ + AnyCPU + + + + + + + + ..\..\packages\Microsoft.Owin.4.0.1\lib\net45\Microsoft.Owin.dll + + + ..\..\packages\Microsoft.Owin.FileSystems.4.0.1\lib\net45\Microsoft.Owin.FileSystems.dll + + + packages\Microsoft.Owin.Host.HttpListener.4.0.1\lib\net45\Microsoft.Owin.Host.HttpListener.dll + + + packages\Microsoft.Owin.Hosting.4.0.1\lib\net45\Microsoft.Owin.Hosting.dll + + + ..\..\packages\Microsoft.Owin.StaticFiles.4.0.1\lib\net45\Microsoft.Owin.StaticFiles.dll + + + packages\Newtonsoft.Json.12.0.2\lib\net45\Newtonsoft.Json.dll + + + ..\..\packages\Owin.1.0\lib\net40\Owin.dll + + + + + + + ..\..\packages\Microsoft.AspNet.WebApi.Client.5.2.7\lib\net45\System.Net.Http.Formatting.dll + + + + + ..\..\packages\Microsoft.AspNet.WebApi.Core.5.2.7\lib\net45\System.Web.Http.dll + + + ..\..\packages\Microsoft.AspNet.WebApi.Owin.5.2.7\lib\net45\System.Web.Http.Owin.dll + + + packages\ZeroLevel.2.0.8\lib\netstandard2.0\ZeroLevel.dll + + + + + + + + + + + + + + + + + + + + + + + + + + Always + + + Always + + + Always + + + Always + + + Always + + + Always + + + Always + + + Always + + + Always + + + Always + + + Always + + + Always + + + + + Always + + + Always + + + + + Always + + + + + {1e700d21-62d3-4525-93fe-c1fb0a1b0564} + LemmaSharpPrebuilt + + + {d926b493-78b6-4fab-a354-53869f664b5b} + LemmaSharpPrebuiltFull + + + {a39293c1-92d8-47b9-93a4-41f443b4f9e4} + LemmaSharp + + + + + \ No newline at end of file diff --git a/WebSemanticService/semantic/Semantic.API/Semantic.API.sln b/WebSemanticService/semantic/Semantic.API/Semantic.API.sln new file mode 100644 index 0000000..efd4e69 --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API/Semantic.API.sln @@ -0,0 +1,107 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28307.421 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Semantic.API", "Semantic.API.csproj", "{E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LemmaSharp", "..\..\Vendors\LemmaGen\LemmaGen_v3.0_PrebuiltFull\LemmaSharp\LemmaSharp.csproj", "{A39293C1-92D8-47B9-93A4-41F443B4F9E4}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LemmaSharpPrebuiltFull", "..\..\Vendors\LemmaGen\LemmaGen_v3.0_PrebuiltFull\LemmaSharpPrebuilt\LemmaSharpPrebuiltFull.csproj", "{D926B493-78B6-4FAB-A354-53869F664B5B}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LemmaSharpPrebuilt", "..\..\Vendors\LemmaGen\LemmaGen_v3.0_PrebuiltFull\LemmaSharpPrebuilt\LemmaSharpPrebuilt.csproj", "{1E700D21-62D3-4525-93FE-C1FB0A1B0564}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Test.Semantic.API.Proxy", "Test.Semantic.API.Proxy\Test.Semantic.API.Proxy.csproj", "{1FF0A134-65CB-4059-B93E-F7E34BBB53FE}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Semantic.API.Proxy", "..\..\libraries\Semantic.API.Proxy\Semantic.API.Proxy.csproj", "{75305C4A-D132-4CEB-8381-583A50135415}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Debug|Any CPU.Build.0 = Debug|Any CPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Debug|x64.ActiveCfg = Debug|Any CPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Debug|x64.Build.0 = Debug|Any CPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Debug|x86.ActiveCfg = Debug|Any CPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Debug|x86.Build.0 = Debug|Any CPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Release|Any CPU.ActiveCfg = Release|Any CPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Release|Any CPU.Build.0 = Release|Any CPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Release|x64.ActiveCfg = Release|Any CPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Release|x64.Build.0 = Release|Any CPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Release|x86.ActiveCfg = Release|Any CPU + {E17951D5-85F2-4FDA-B9FD-1116EA6CCB0A}.Release|x86.Build.0 = Release|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|Any CPU.Build.0 = Debug|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x64.ActiveCfg = Debug|x64 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x64.Build.0 = Debug|x64 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x86.ActiveCfg = Debug|x86 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x86.Build.0 = Debug|x86 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|Any CPU.ActiveCfg = Release|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|Any CPU.Build.0 = Release|Any CPU + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x64.ActiveCfg = Release|x64 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x64.Build.0 = Release|x64 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x86.ActiveCfg = Release|x86 + {A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x86.Build.0 = Release|x86 + {D926B493-78B6-4FAB-A354-53869F664B5B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.Debug|x64.ActiveCfg = Debug|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.Debug|x64.Build.0 = Debug|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.Debug|x86.ActiveCfg = Debug|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.Debug|x86.Build.0 = Debug|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.Release|Any CPU.Build.0 = Release|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.Release|x64.ActiveCfg = Release|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.Release|x64.Build.0 = Release|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.Release|x86.ActiveCfg = Release|Any CPU + {D926B493-78B6-4FAB-A354-53869F664B5B}.Release|x86.Build.0 = Release|Any CPU + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|Any CPU.Build.0 = Debug|Any CPU + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|x64.ActiveCfg = Debug|x64 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|x64.Build.0 = Debug|x64 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|x86.ActiveCfg = Debug|x86 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|x86.Build.0 = Debug|x86 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|Any CPU.ActiveCfg = Release|Any CPU + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|Any CPU.Build.0 = Release|Any CPU + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|x64.ActiveCfg = Release|x64 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|x64.Build.0 = Release|x64 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|x86.ActiveCfg = Release|x86 + {1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|x86.Build.0 = Release|x86 + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Debug|Any CPU.Build.0 = Debug|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Debug|x64.ActiveCfg = Debug|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Debug|x64.Build.0 = Debug|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Debug|x86.ActiveCfg = Debug|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Debug|x86.Build.0 = Debug|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Release|Any CPU.ActiveCfg = Release|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Release|Any CPU.Build.0 = Release|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Release|x64.ActiveCfg = Release|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Release|x64.Build.0 = Release|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Release|x86.ActiveCfg = Release|Any CPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE}.Release|x86.Build.0 = Release|Any CPU + {75305C4A-D132-4CEB-8381-583A50135415}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {75305C4A-D132-4CEB-8381-583A50135415}.Debug|Any CPU.Build.0 = Debug|Any CPU + {75305C4A-D132-4CEB-8381-583A50135415}.Debug|x64.ActiveCfg = Debug|Any CPU + {75305C4A-D132-4CEB-8381-583A50135415}.Debug|x64.Build.0 = Debug|Any CPU + {75305C4A-D132-4CEB-8381-583A50135415}.Debug|x86.ActiveCfg = Debug|Any CPU + {75305C4A-D132-4CEB-8381-583A50135415}.Debug|x86.Build.0 = Debug|Any CPU + {75305C4A-D132-4CEB-8381-583A50135415}.Release|Any CPU.ActiveCfg = Release|Any CPU + {75305C4A-D132-4CEB-8381-583A50135415}.Release|Any CPU.Build.0 = Release|Any CPU + {75305C4A-D132-4CEB-8381-583A50135415}.Release|x64.ActiveCfg = Release|Any CPU + {75305C4A-D132-4CEB-8381-583A50135415}.Release|x64.Build.0 = Release|Any CPU + {75305C4A-D132-4CEB-8381-583A50135415}.Release|x86.ActiveCfg = Release|Any CPU + {75305C4A-D132-4CEB-8381-583A50135415}.Release|x86.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {AE2538E5-3D6F-4CFE-8A87-1B1CDD09EC66} + EndGlobalSection +EndGlobal diff --git a/WebSemanticService/semantic/Semantic.API/Startup.cs b/WebSemanticService/semantic/Semantic.API/Startup.cs new file mode 100644 index 0000000..e61a5f8 --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API/Startup.cs @@ -0,0 +1,109 @@ +using Microsoft.Owin.FileSystems; +using Microsoft.Owin.Hosting; +using Microsoft.Owin.StaticFiles; +using Owin; +using System.Collections.Generic; +using System.IO; +using System.Net; +using System.Net.Http; +using System.Threading; +using System.Threading.Tasks; +using System.Web.Http; +using System.Web.Http.Controllers; +using System.Web.Http.Routing; +using ZeroLevel; + +namespace Semantic.API +{ + public class LogRequestAndResponseHandler : DelegatingHandler + { + protected override async Task SendAsync( + HttpRequestMessage request, CancellationToken cancellationToken) + { + // log request body + string requestBody = await request.Content.ReadAsStringAsync(); + Log.Debug(requestBody); + // let other handlers process the request + var result = await base.SendAsync(request, cancellationToken); + if (result.Content != null) + { + //(result.Content as ObjectContent).Formatter.MediaTypeMappings.Clear(); + // once response body is ready, log it + var responseBody = await result.Content.ReadAsStringAsync(); + Log.Debug(responseBody); + } + return result; + } + } + + public class EnableInheritRoutingDirectRouteProvider : DefaultDirectRouteProvider + { + protected override IReadOnlyList GetActionRouteFactories(HttpActionDescriptor actionDescriptor) + { + // inherit route attributes decorated on base class controller's actions + return actionDescriptor.GetCustomAttributes(inherit: true); + } + } + + public class Startup + { + // This code configures Web API. The Startup class is specified as a type + // parameter in the WebApp.Start method. + public void Configuration(IAppBuilder appBuilder) + { + // Configure Web API for self-host. + HttpConfiguration config = new HttpConfiguration(); + + config.MapHttpAttributeRoutes(new EnableInheritRoutingDirectRouteProvider()); + + config.Routes.MapHttpRoute( + name: "DefaultApi", + routeTemplate: "api/{controller}/{action}/{id}", + defaults: new { id = RouteParameter.Optional } + ); + + config.EnsureInitialized(); + if (_log_request_response) + { + config.MessageHandlers.Add(new LogRequestAndResponseHandler()); + } + if (ZeroLevel.Configuration.Default.FirstOrDefault("ntlmEnabled", false)) + { + // Enable NTLM authentication + ((HttpListener)appBuilder.Properties["System.Net.HttpListener"]).AuthenticationSchemes = + AuthenticationSchemes.IntegratedWindowsAuthentication; + } + appBuilder.UseWebApi(config); + if (_enable_static_files) + { + var webdir = Path.Combine(ZeroLevel.Configuration.BaseDirectory, "web"); + if (false == Directory.Exists(webdir)) + { + Directory.CreateDirectory(webdir); + } + PhysicalFileSystem fileSystem = new PhysicalFileSystem(webdir); + FileServerOptions options = new FileServerOptions + { + EnableDefaultFiles = true, + FileSystem = fileSystem + }; + options.StaticFileOptions.ServeUnknownFileTypes = true; + appBuilder.UseFileServer(options); + } + } + + private static bool _log_request_response; + private static bool _enable_static_files; + public static void Run( + bool log_request_response, + bool enable_static_files = false) + { + _log_request_response = log_request_response; + _enable_static_files = enable_static_files; + string baseAddress = string.Format("http://*:{0}/", + ZeroLevel.Configuration.Default.First("webApiPort")); + WebApp.Start(url: baseAddress); + Log.Info(string.Format("Web service url: {0}", baseAddress)); + } + } +} diff --git a/WebSemanticService/semantic/Semantic.API/Test.Semantic.API.Proxy/Program.cs b/WebSemanticService/semantic/Semantic.API/Test.Semantic.API.Proxy/Program.cs new file mode 100644 index 0000000..6796179 --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API/Test.Semantic.API.Proxy/Program.cs @@ -0,0 +1,157 @@ +using Semantic.API.Proxy; +using System; +using System.Collections.Generic; +using System.Linq; +using ZeroLevel.Services.Semantic; + +namespace Test.Semantic.API.Proxy +{ + class Program + { + static void Main(string[] args) + { + var proxy = new SemanticApiProxy("http://localhost:8020"); + var text = "Мы вполне привыкли к трём пространственным измерениям нашей Вселенной, к длине, ширине и глубине. Мы можем представить, как выглядят вещи в меньших измерениях – на двумерной плоскости или на одномерной линии – но с высшими измерениями всё не так просто, поскольку мы не можем представить себе движение в направлении, не описываемом нашим привычным пространством. Во Вселенной есть и четвёртое измерение (время), и только три пространственных. Но среди вопросов на этой неделе я увидел выдающийся вопрос из серии «что, если» от писателя Келли Люк: Что для людей означало бы, если бы количество измерений в нашем мире менялось бы как времена года? Например, половина года у нас три измерения, а половина – четыре. Представьте, по возможности, что у вас есть способность двигаться в одном дополнительном направлении – не входящем в обычный набор вверх-вниз, север - юг и запад-восток.Представьте, что такая способность есть только у вас"; + var direct_words = new string[] { "представить", "пространством", "измерения" }; + var words = new string[] { "представлять", "пространство", "измерение", "писатель" }; + + var direct_phrases = new string[] { "Мы вполне привыкли", "описываемом нашим привычным", "среди вопросов" }; + var phrases = new string[] { "высшие измерения", "наш мир", "двумерная плоскость", "все не так просто" }; + // 1. Split + TestSplitTextIntoWords(proxy, text); + Console.ReadKey(); + Console.Clear(); + // 2. Stemming + TestSplitTextIntoStems(proxy, text); + Console.ReadKey(); + Console.Clear(); + // 3. Lemmatization + TestSplitTextIntoLemmas(proxy, text); + Console.ReadKey(); + Console.Clear(); + // 4. Search words + TestSearchWordsInText(proxy, text, direct_words, words); + Console.ReadKey(); + Console.Clear(); + // 5. Search phrases + TestSearchPhrasesInText(proxy, text, direct_phrases, phrases); + Console.ReadKey(); + Console.Clear(); + + } + + private static void TestSplitTextIntoWords(SemanticApiProxy proxy, string text) + { + Console.WriteLine("Разбиение на слова"); + Console.WriteLine(text); + Console.WriteLine("Words:"); + ShowLines(proxy.ExtractWords(text)); + Console.WriteLine("Unique words:"); + ShowLines(proxy.ExtractUniqueWords(text)); + Console.WriteLine("Words dictionary:"); + ShowLines(proxy.ExtractUniqueWordsWithoutStopWords(text)); + Console.WriteLine("Completed. Press key to continue..."); + } + + private static void TestSplitTextIntoStems(SemanticApiProxy proxy, string text) + { + Console.WriteLine("Разбиение на стемы"); + Console.WriteLine(text); + Console.WriteLine("Stems:"); + ShowLines(proxy.ExtractStems(text)); + Console.WriteLine("Stem tokens:"); + ShowLines(proxy.ExtractUniqueStems(text)); + Console.WriteLine("Stems dictionary:"); + ShowLines(proxy.ExtractUniqueStemsWithoutStopWords(text)); + Console.WriteLine("Completed. Press key to continue..."); + } + + private static void TestSplitTextIntoLemmas(SemanticApiProxy proxy, string text) + { + Console.WriteLine("Разбиение на леммы"); + Console.WriteLine(text); + Console.WriteLine("Lemmas:"); + ShowLines(proxy.ExtractLemmas(text)); + Console.WriteLine("Unique lemmas:"); + ShowLines(proxy.ExtractUniqueLemmas(text)); + Console.WriteLine("Lemmas dictionary:"); + ShowLines(proxy.ExtractUniqueLemmasWithoutStopWords(text)); + Console.WriteLine("Completed. Press key to continue..."); + } + + private static void TestSearchWordsInText(SemanticApiProxy proxy, string text, string[] direct_words, string[] words) + { + Console.WriteLine("Поиск слов в текст"); + Console.WriteLine(text); + Console.WriteLine(string.Join("; ", words)); + Console.WriteLine(string.Join("; ", direct_words)); + Console.WriteLine("GET"); + Console.WriteLine("Прямой поиск слов:"); + ShowLines(proxy.SearchWordsInTextDirectly(text, direct_words)); + Console.WriteLine("Поиск слов по стемам:"); + ShowLines(proxy.SearchWordsInTextByStemming(text, words)); + Console.WriteLine("Поиск слов по леммам:"); + ShowLines(proxy.SearchWordsInTextByLemmas(text, words)); + Console.WriteLine("Completed. Press key to continue..."); + } + + private static void TestSearchPhrasesInText(SemanticApiProxy proxy, string text, string[] direct_phrases, string[] phrases) + { + Console.WriteLine("Поиск фраз в тексте"); + Console.WriteLine(text); + Console.WriteLine("Прямой поиск фраз:"); + ShowLines(proxy.SearchPhrasesInTextDirectly(text, direct_phrases)); + Console.WriteLine("Поиск фраз по стемам:"); + ShowLines(proxy.SearchPhrasesInTextByStemming(text, phrases)); + Console.WriteLine("Поиск фраз по леммам:"); + ShowLines(proxy.SearchPhrasesInTextByLemmas(text, phrases)); + Console.WriteLine("Completed. Press key to continue..."); + } + + private static void ShowLines(IDictionary> lexems) + { + foreach (var pair in lexems) + { + Console.Write(pair.Key); + foreach (var l in pair.Value) + { + Console.Write($"\t[{string.Join("; ", l.Select(e => e.Token))}]"); + } + } + Console.WriteLine(); + } + + private static void ShowLines(IEnumerable lines) + { + var columns_count = 4; + var list = lines.ToList(); + var dif = list.Count - list.Count % columns_count; + for (int i = 0; i < dif; i += columns_count) + { + for (var j = 0; j < columns_count; j++) + { + Console.Write("\t{0}[{1}]", list[i + j].Token, list[i + j].Word); + } + Console.WriteLine(); + } + for (var j = list.Count - dif; j > 0; j--) + { + Console.Write("\t{0}[{1}]", list[list.Count - j].Token, list[list.Count - j].Word); + } + Console.WriteLine(); + } + + private static void ShowLines(IDictionary> lexems) + { + foreach (var pair in lexems) + { + Console.Write(pair.Key); + foreach (var l in pair.Value) + { + Console.Write($"\t{l.Token} ({l.Position})"); + } + } + Console.WriteLine(); + } + } +} diff --git a/WebSemanticService/semantic/Semantic.API/Test.Semantic.API.Proxy/Properties/AssemblyInfo.cs b/WebSemanticService/semantic/Semantic.API/Test.Semantic.API.Proxy/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..94b7251 --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API/Test.Semantic.API.Proxy/Properties/AssemblyInfo.cs @@ -0,0 +1,36 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("Test.Semantic.API.Proxy")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("Test.Semantic.API.Proxy")] +[assembly: AssemblyCopyright("Copyright © 2016")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("1ff0a134-65cb-4059-b93e-f7e34bbb53fe")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/WebSemanticService/semantic/Semantic.API/Test.Semantic.API.Proxy/Test.Semantic.API.Proxy.csproj b/WebSemanticService/semantic/Semantic.API/Test.Semantic.API.Proxy/Test.Semantic.API.Proxy.csproj new file mode 100644 index 0000000..552484d --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API/Test.Semantic.API.Proxy/Test.Semantic.API.Proxy.csproj @@ -0,0 +1,70 @@ + + + + + Debug + AnyCPU + {1FF0A134-65CB-4059-B93E-F7E34BBB53FE} + Exe + Properties + Test.Semantic.API.Proxy + Test.Semantic.API.Proxy + v4.7.2 + 512 + + + + AnyCPU + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + false + + + AnyCPU + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + false + + + + + + + + + + ..\..\..\packages\ZeroLevel.2.0.8\lib\netstandard2.0\ZeroLevel.dll + + + + + + + + + + + + + {3413df58-8ba9-4276-9c6d-6f67b527c9af} + Semantic.API.Proxy + + + + + \ No newline at end of file diff --git a/WebSemanticService/semantic/Semantic.API/Test.Semantic.API.Proxy/app.config b/WebSemanticService/semantic/Semantic.API/Test.Semantic.API.Proxy/app.config new file mode 100644 index 0000000..312bb3f --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API/Test.Semantic.API.Proxy/app.config @@ -0,0 +1,3 @@ + + + diff --git a/WebSemanticService/semantic/Semantic.API/Test.Semantic.API.Proxy/packages.config b/WebSemanticService/semantic/Semantic.API/Test.Semantic.API.Proxy/packages.config new file mode 100644 index 0000000..778ce5c --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API/Test.Semantic.API.Proxy/packages.config @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/WebSemanticService/semantic/Semantic.API/Web/CSS/glitch.css b/WebSemanticService/semantic/Semantic.API/Web/CSS/glitch.css new file mode 100644 index 0000000..72ec343 --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API/Web/CSS/glitch.css @@ -0,0 +1,163 @@ +.glitch { + color: white; + font-size: 100px; + position: relative; + width: 400px; + margin: 0 auto; +} + +@keyframes noise-anim { + 0% { + clip: rect(78px, 9999px, 23px, 0); + } + 5% { + clip: rect(23px, 9999px, 97px, 0); + } + 10% { + clip: rect(97px, 9999px, 46px, 0); + } + 15% { + clip: rect(87px, 9999px, 92px, 0); + } + 20% { + clip: rect(68px, 9999px, 80px, 0); + } + 25% { + clip: rect(18px, 9999px, 64px, 0); + } + 30% { + clip: rect(64px, 9999px, 58px, 0); + } + 35% { + clip: rect(94px, 9999px, 11px, 0); + } + 40% { + clip: rect(19px, 9999px, 37px, 0); + } + 45% { + clip: rect(68px, 9999px, 34px, 0); + } + 50% { + clip: rect(56px, 9999px, 87px, 0); + } + 55% { + clip: rect(86px, 9999px, 47px, 0); + } + 60% { + clip: rect(32px, 9999px, 40px, 0); + } + 65% { + clip: rect(5px, 9999px, 44px, 0); + } + 70% { + clip: rect(62px, 9999px, 62px, 0); + } + 75% { + clip: rect(69px, 9999px, 83px, 0); + } + 80% { + clip: rect(74px, 9999px, 76px, 0); + } + 85% { + clip: rect(6px, 9999px, 23px, 0); + } + 90% { + clip: rect(12px, 9999px, 39px, 0); + } + 95% { + clip: rect(10px, 9999px, 22px, 0); + } + 100% { + clip: rect(100px, 9999px, 61px, 0); + } +} +.glitch:after { + content: attr(data-text); + position: absolute; + left: 2px; + text-shadow: -1px 0 red; + top: 0; + color: white; + background: black; + overflow: hidden; + clip: rect(0, 900px, 0, 0); + animation: noise-anim 2s infinite linear alternate-reverse; +} + +@keyframes noise-anim-2 { + 0% { + clip: rect(30px, 9999px, 29px, 0); + } + 5% { + clip: rect(61px, 9999px, 52px, 0); + } + 10% { + clip: rect(96px, 9999px, 53px, 0); + } + 15% { + clip: rect(91px, 9999px, 6px, 0); + } + 20% { + clip: rect(42px, 9999px, 84px, 0); + } + 25% { + clip: rect(99px, 9999px, 6px, 0); + } + 30% { + clip: rect(78px, 9999px, 83px, 0); + } + 35% { + clip: rect(58px, 9999px, 49px, 0); + } + 40% { + clip: rect(45px, 9999px, 63px, 0); + } + 45% { + clip: rect(43px, 9999px, 48px, 0); + } + 50% { + clip: rect(26px, 9999px, 53px, 0); + } + 55% { + clip: rect(74px, 9999px, 5px, 0); + } + 60% { + clip: rect(13px, 9999px, 49px, 0); + } + 65% { + clip: rect(71px, 9999px, 57px, 0); + } + 70% { + clip: rect(83px, 9999px, 47px, 0); + } + 75% { + clip: rect(92px, 9999px, 88px, 0); + } + 80% { + clip: rect(87px, 9999px, 29px, 0); + } + 85% { + clip: rect(92px, 9999px, 42px, 0); + } + 90% { + clip: rect(95px, 9999px, 7px, 0); + } + 95% { + clip: rect(48px, 9999px, 74px, 0); + } + 100% { + clip: rect(82px, 9999px, 5px, 0); + } +} +.glitch:before { + content: attr(data-text); + position: absolute; + left: -2px; + text-shadow: 1px 0 blue; + top: 0; + color: white; + background: black; + overflow: hidden; + clip: rect(0, 900px, 0, 0); + animation: noise-anim-2 3s infinite linear alternate-reverse; +} diff --git a/WebSemanticService/semantic/Semantic.API/Web/CSS/jquery-ui.min.css b/WebSemanticService/semantic/Semantic.API/Web/CSS/jquery-ui.min.css new file mode 100644 index 0000000..1bd40d2 --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API/Web/CSS/jquery-ui.min.css @@ -0,0 +1,1170 @@ +.ui-helper-hidden { + display: none; +} + +.ui-helper-hidden-accessible { + border: 0; + clip: rect(0 0 0 0); + height: 1px; + margin: -1px; + overflow: hidden; + padding: 0; + position: absolute; + width: 1px; +} + +.ui-helper-reset { + margin: 0; + padding: 0; + border: 0; + outline: 0; + line-height: 1.3; + text-decoration: none; + font-size: 100%; + list-style: none; +} + +.ui-helper-clearfix:before, .ui-helper-clearfix:after { + content: ""; + display: table; + border-collapse: collapse; +} + +.ui-helper-clearfix:after { + clear: both; +} + +.ui-helper-zfix { + width: 100%; + height: 100%; + top: 0; + left: 0; + position: absolute; + opacity: 0; + filter: Alpha(Opacity=0); +} + +.ui-front { + z-index: 100; +} + +.ui-state-disabled { + cursor: default !important; + pointer-events: none; +} + +.ui-icon { + display: inline-block; + vertical-align: middle; + margin-top: -.25em; + position: relative; + text-indent: -99999px; + overflow: hidden; + background-repeat: no-repeat; +} + +.ui-widget-icon-block { + left: 50%; + margin-left: -8px; + display: block; +} + +.ui-widget-overlay { + position: fixed; + top: 0; + left: 0; + width: 100%; + height: 100%; +} + +.ui-datepicker { + width: 17em; + padding: .2em .2em 0; + display: none; +} + + .ui-datepicker .ui-datepicker-header { + position: relative; + padding: .2em 0; + } + + .ui-datepicker .ui-datepicker-prev, .ui-datepicker .ui-datepicker-next { + position: absolute; + top: 2px; + width: 1.8em; + height: 1.8em; + } + + .ui-datepicker .ui-datepicker-prev-hover, .ui-datepicker .ui-datepicker-next-hover { + top: 1px; + } + + .ui-datepicker .ui-datepicker-prev { + left: 2px; + } + + .ui-datepicker .ui-datepicker-next { + right: 2px; + } + + .ui-datepicker .ui-datepicker-prev-hover { + left: 1px; + } + + .ui-datepicker .ui-datepicker-next-hover { + right: 1px; + } + + .ui-datepicker .ui-datepicker-prev span, .ui-datepicker .ui-datepicker-next span { + display: block; + position: absolute; + left: 50%; + margin-left: -8px; + top: 50%; + margin-top: -8px; + } + + .ui-datepicker .ui-datepicker-title { + margin: 0 2.3em; + line-height: 1.8em; + text-align: center; + } + + .ui-datepicker .ui-datepicker-title select { + font-size: 1em; + margin: 1px 0; + } + + .ui-datepicker select.ui-datepicker-month, .ui-datepicker select.ui-datepicker-year { + width: 45%; + } + + .ui-datepicker table { + width: 100%; + font-size: .9em; + border-collapse: collapse; + margin: 0 0 .4em; + } + + .ui-datepicker th { + padding: .7em .3em; + text-align: center; + font-weight: bold; + border: 0; + } + + .ui-datepicker td { + border: 0; + padding: 1px; + } + + .ui-datepicker td span, .ui-datepicker td a { + display: block; + padding: .2em; + text-align: right; + text-decoration: none; + } + + .ui-datepicker .ui-datepicker-buttonpane { + background-image: none; + margin: .7em 0 0 0; + padding: 0 .2em; + border-left: 0; + border-right: 0; + border-bottom: 0; + } + + .ui-datepicker .ui-datepicker-buttonpane button { + float: right; + margin: .5em .2em .4em; + cursor: pointer; + padding: .2em .6em .3em .6em; + width: auto; + overflow: visible; + } + + .ui-datepicker .ui-datepicker-buttonpane button.ui-datepicker-current { + float: left; + } + + .ui-datepicker.ui-datepicker-multi { + width: auto; + } + +.ui-datepicker-multi .ui-datepicker-group { + float: left; +} + + .ui-datepicker-multi .ui-datepicker-group table { + width: 95%; + margin: 0 auto .4em; + } + +.ui-datepicker-multi-2 .ui-datepicker-group { + width: 50%; +} + +.ui-datepicker-multi-3 .ui-datepicker-group { + width: 33.3%; +} + +.ui-datepicker-multi-4 .ui-datepicker-group { + width: 25%; +} + +.ui-datepicker-multi .ui-datepicker-group-last .ui-datepicker-header, .ui-datepicker-multi .ui-datepicker-group-middle .ui-datepicker-header { + border-left-width: 0; +} + +.ui-datepicker-multi .ui-datepicker-buttonpane { + clear: left; +} + +.ui-datepicker-row-break { + clear: both; + width: 100%; + font-size: 0; +} + +.ui-datepicker-rtl { + direction: rtl; +} + + .ui-datepicker-rtl .ui-datepicker-prev { + right: 2px; + left: auto; + } + + .ui-datepicker-rtl .ui-datepicker-next { + left: 2px; + right: auto; + } + + .ui-datepicker-rtl .ui-datepicker-prev:hover { + right: 1px; + left: auto; + } + + .ui-datepicker-rtl .ui-datepicker-next:hover { + left: 1px; + right: auto; + } + + .ui-datepicker-rtl .ui-datepicker-buttonpane { + clear: right; + } + + .ui-datepicker-rtl .ui-datepicker-buttonpane button { + float: left; + } + + .ui-datepicker-rtl .ui-datepicker-buttonpane button.ui-datepicker-current, .ui-datepicker-rtl .ui-datepicker-group { + float: right; + } + + .ui-datepicker-rtl .ui-datepicker-group-last .ui-datepicker-header, .ui-datepicker-rtl .ui-datepicker-group-middle .ui-datepicker-header { + border-right-width: 0; + border-left-width: 1px; + } + +.ui-datepicker .ui-icon { + display: block; + text-indent: -99999px; + overflow: hidden; + background-repeat: no-repeat; + left: .5em; + top: .3em; +} + +.ui-widget { + font-family: Arial,Helvetica,sans-serif; + font-size: 1em; +} + + .ui-widget .ui-widget { + font-size: 1em; + } + + .ui-widget input, .ui-widget select, .ui-widget textarea, .ui-widget button { + font-family: Arial,Helvetica,sans-serif; + font-size: 1em; + } + + .ui-widget.ui-widget-content { + border: 1px solid #c5c5c5; + } + +.ui-widget-content { + border: 1px solid #ddd; + background: #fff; + color: #333; +} + + .ui-widget-content a { + color: #333; + } + +.ui-widget-header { + border: 1px solid #ddd; + background: #e9e9e9; + color: #333; + font-weight: bold; +} + + .ui-widget-header a { + color: #333; + } + + .ui-state-default, .ui-widget-content .ui-state-default, .ui-widget-header .ui-state-default, .ui-button, html .ui-button.ui-state-disabled:hover, html .ui-button.ui-state-disabled:active { + border: 1px solid #c5c5c5; + background: #f6f6f6; + font-weight: normal; + color: #454545; + } + + .ui-state-default a, .ui-state-default a:link, .ui-state-default a:visited, a.ui-button, a:link.ui-button, a:visited.ui-button, .ui-button { + color: #454545; + text-decoration: none; + } + + .ui-state-hover, .ui-widget-content .ui-state-hover, .ui-widget-header .ui-state-hover, .ui-state-focus, .ui-widget-content .ui-state-focus, .ui-widget-header .ui-state-focus, .ui-button:hover, .ui-button:focus { + border: 1px solid #ccc; + background: #ededed; + font-weight: normal; + color: #2b2b2b; + } + + .ui-state-hover a, .ui-state-hover a:hover, .ui-state-hover a:link, .ui-state-hover a:visited, .ui-state-focus a, .ui-state-focus a:hover, .ui-state-focus a:link, .ui-state-focus a:visited, a.ui-button:hover, a.ui-button:focus { + color: #2b2b2b; + text-decoration: none; + } + +.ui-visual-focus { + box-shadow: 0 0 3px 1px rgb(94,158,214); +} + +.ui-state-active, .ui-widget-content .ui-state-active, .ui-widget-header .ui-state-active, a.ui-button:active, .ui-button:active, .ui-button.ui-state-active:hover { + border: 1px solid #003eff; + background: #007fff; + font-weight: normal; + color: #fff; +} + + .ui-icon-background, .ui-state-active .ui-icon-background { + border: #003eff; + background-color: #fff; + } + + .ui-state-active a, .ui-state-active a:link, .ui-state-active a:visited { + color: #fff; + text-decoration: none; + } + +.ui-state-highlight, .ui-widget-content .ui-state-highlight, .ui-widget-header .ui-state-highlight { + border: 1px solid #dad55e; + background: #fffa90; + color: #777620; +} + +.ui-state-checked { + border: 1px solid #dad55e; + background: #fffa90; +} + +.ui-state-highlight a, .ui-widget-content .ui-state-highlight a, .ui-widget-header .ui-state-highlight a { + color: #777620; +} + +.ui-state-error, .ui-widget-content .ui-state-error, .ui-widget-header .ui-state-error { + border: 1px solid #f1a899; + background: #fddfdf; + color: #5f3f3f; +} + + .ui-state-error a, .ui-widget-content .ui-state-error a, .ui-widget-header .ui-state-error a { + color: #5f3f3f; + } + +.ui-state-error-text, .ui-widget-content .ui-state-error-text, .ui-widget-header .ui-state-error-text { + color: #5f3f3f; +} + +.ui-priority-primary, .ui-widget-content .ui-priority-primary, .ui-widget-header .ui-priority-primary { + font-weight: bold; +} + +.ui-priority-secondary, .ui-widget-content .ui-priority-secondary, .ui-widget-header .ui-priority-secondary { + opacity: .7; + filter: Alpha(Opacity=70); + font-weight: normal; +} + +.ui-state-disabled, .ui-widget-content .ui-state-disabled, .ui-widget-header .ui-state-disabled { + opacity: .35; + filter: Alpha(Opacity=35); + background-image: none; +} + + .ui-state-disabled .ui-icon { + filter: Alpha(Opacity=35); + } + +.ui-icon { + width: 16px; + height: 16px; +} + +.ui-icon, .ui-widget-content .ui-icon { + background-image: url("images/ui-icons_444444_256x240.png"); +} + +.ui-widget-header .ui-icon { + background-image: url("images/ui-icons_444444_256x240.png"); +} + +.ui-state-hover .ui-icon, .ui-state-focus .ui-icon, .ui-button:hover .ui-icon, .ui-button:focus .ui-icon { + background-image: url("images/ui-icons_555555_256x240.png"); +} + +.ui-state-active .ui-icon, .ui-button:active .ui-icon { + background-image: url("images/ui-icons_ffffff_256x240.png"); +} + +.ui-state-highlight .ui-icon, .ui-button .ui-state-highlight.ui-icon { + background-image: url("images/ui-icons_777620_256x240.png"); +} + +.ui-state-error .ui-icon, .ui-state-error-text .ui-icon { + background-image: url("images/ui-icons_cc0000_256x240.png"); +} + +.ui-button .ui-icon { + background-image: url("images/ui-icons_777777_256x240.png"); +} + +.ui-icon-blank { + background-position: 16px 16px; +} + +.ui-icon-caret-1-n { + background-position: 0 0; +} + +.ui-icon-caret-1-ne { + background-position: -16px 0; +} + +.ui-icon-caret-1-e { + background-position: -32px 0; +} + +.ui-icon-caret-1-se { + background-position: -48px 0; +} + +.ui-icon-caret-1-s { + background-position: -65px 0; +} + +.ui-icon-caret-1-sw { + background-position: -80px 0; +} + +.ui-icon-caret-1-w { + background-position: -96px 0; +} + +.ui-icon-caret-1-nw { + background-position: -112px 0; +} + +.ui-icon-caret-2-n-s { + background-position: -128px 0; +} + +.ui-icon-caret-2-e-w { + background-position: -144px 0; +} + +.ui-icon-triangle-1-n { + background-position: 0 -16px; +} + +.ui-icon-triangle-1-ne { + background-position: -16px -16px; +} + +.ui-icon-triangle-1-e { + background-position: -32px -16px; +} + +.ui-icon-triangle-1-se { + background-position: -48px -16px; +} + +.ui-icon-triangle-1-s { + background-position: -65px -16px; +} + +.ui-icon-triangle-1-sw { + background-position: -80px -16px; +} + +.ui-icon-triangle-1-w { + background-position: -96px -16px; +} + +.ui-icon-triangle-1-nw { + background-position: -112px -16px; +} + +.ui-icon-triangle-2-n-s { + background-position: -128px -16px; +} + +.ui-icon-triangle-2-e-w { + background-position: -144px -16px; +} + +.ui-icon-arrow-1-n { + background-position: 0 -32px; +} + +.ui-icon-arrow-1-ne { + background-position: -16px -32px; +} + +.ui-icon-arrow-1-e { + background-position: -32px -32px; +} + +.ui-icon-arrow-1-se { + background-position: -48px -32px; +} + +.ui-icon-arrow-1-s { + background-position: -65px -32px; +} + +.ui-icon-arrow-1-sw { + background-position: -80px -32px; +} + +.ui-icon-arrow-1-w { + background-position: -96px -32px; +} + +.ui-icon-arrow-1-nw { + background-position: -112px -32px; +} + +.ui-icon-arrow-2-n-s { + background-position: -128px -32px; +} + +.ui-icon-arrow-2-ne-sw { + background-position: -144px -32px; +} + +.ui-icon-arrow-2-e-w { + background-position: -160px -32px; +} + +.ui-icon-arrow-2-se-nw { + background-position: -176px -32px; +} + +.ui-icon-arrowstop-1-n { + background-position: -192px -32px; +} + +.ui-icon-arrowstop-1-e { + background-position: -208px -32px; +} + +.ui-icon-arrowstop-1-s { + background-position: -224px -32px; +} + +.ui-icon-arrowstop-1-w { + background-position: -240px -32px; +} + +.ui-icon-arrowthick-1-n { + background-position: 1px -48px; +} + +.ui-icon-arrowthick-1-ne { + background-position: -16px -48px; +} + +.ui-icon-arrowthick-1-e { + background-position: -32px -48px; +} + +.ui-icon-arrowthick-1-se { + background-position: -48px -48px; +} + +.ui-icon-arrowthick-1-s { + background-position: -64px -48px; +} + +.ui-icon-arrowthick-1-sw { + background-position: -80px -48px; +} + +.ui-icon-arrowthick-1-w { + background-position: -96px -48px; +} + +.ui-icon-arrowthick-1-nw { + background-position: -112px -48px; +} + +.ui-icon-arrowthick-2-n-s { + background-position: -128px -48px; +} + +.ui-icon-arrowthick-2-ne-sw { + background-position: -144px -48px; +} + +.ui-icon-arrowthick-2-e-w { + background-position: -160px -48px; +} + +.ui-icon-arrowthick-2-se-nw { + background-position: -176px -48px; +} + +.ui-icon-arrowthickstop-1-n { + background-position: -192px -48px; +} + +.ui-icon-arrowthickstop-1-e { + background-position: -208px -48px; +} + +.ui-icon-arrowthickstop-1-s { + background-position: -224px -48px; +} + +.ui-icon-arrowthickstop-1-w { + background-position: -240px -48px; +} + +.ui-icon-arrowreturnthick-1-w { + background-position: 0 -64px; +} + +.ui-icon-arrowreturnthick-1-n { + background-position: -16px -64px; +} + +.ui-icon-arrowreturnthick-1-e { + background-position: -32px -64px; +} + +.ui-icon-arrowreturnthick-1-s { + background-position: -48px -64px; +} + +.ui-icon-arrowreturn-1-w { + background-position: -64px -64px; +} + +.ui-icon-arrowreturn-1-n { + background-position: -80px -64px; +} + +.ui-icon-arrowreturn-1-e { + background-position: -96px -64px; +} + +.ui-icon-arrowreturn-1-s { + background-position: -112px -64px; +} + +.ui-icon-arrowrefresh-1-w { + background-position: -128px -64px; +} + +.ui-icon-arrowrefresh-1-n { + background-position: -144px -64px; +} + +.ui-icon-arrowrefresh-1-e { + background-position: -160px -64px; +} + +.ui-icon-arrowrefresh-1-s { + background-position: -176px -64px; +} + +.ui-icon-arrow-4 { + background-position: 0 -80px; +} + +.ui-icon-arrow-4-diag { + background-position: -16px -80px; +} + +.ui-icon-extlink { + background-position: -32px -80px; +} + +.ui-icon-newwin { + background-position: -48px -80px; +} + +.ui-icon-refresh { + background-position: -64px -80px; +} + +.ui-icon-shuffle { + background-position: -80px -80px; +} + +.ui-icon-transfer-e-w { + background-position: -96px -80px; +} + +.ui-icon-transferthick-e-w { + background-position: -112px -80px; +} + +.ui-icon-folder-collapsed { + background-position: 0 -96px; +} + +.ui-icon-folder-open { + background-position: -16px -96px; +} + +.ui-icon-document { + background-position: -32px -96px; +} + +.ui-icon-document-b { + background-position: -48px -96px; +} + +.ui-icon-note { + background-position: -64px -96px; +} + +.ui-icon-mail-closed { + background-position: -80px -96px; +} + +.ui-icon-mail-open { + background-position: -96px -96px; +} + +.ui-icon-suitcase { + background-position: -112px -96px; +} + +.ui-icon-comment { + background-position: -128px -96px; +} + +.ui-icon-person { + background-position: -144px -96px; +} + +.ui-icon-print { + background-position: -160px -96px; +} + +.ui-icon-trash { + background-position: -176px -96px; +} + +.ui-icon-locked { + background-position: -192px -96px; +} + +.ui-icon-unlocked { + background-position: -208px -96px; +} + +.ui-icon-bookmark { + background-position: -224px -96px; +} + +.ui-icon-tag { + background-position: -240px -96px; +} + +.ui-icon-home { + background-position: 0 -112px; +} + +.ui-icon-flag { + background-position: -16px -112px; +} + +.ui-icon-calendar { + background-position: -32px -112px; +} + +.ui-icon-cart { + background-position: -48px -112px; +} + +.ui-icon-pencil { + background-position: -64px -112px; +} + +.ui-icon-clock { + background-position: -80px -112px; +} + +.ui-icon-disk { + background-position: -96px -112px; +} + +.ui-icon-calculator { + background-position: -112px -112px; +} + +.ui-icon-zoomin { + background-position: -128px -112px; +} + +.ui-icon-zoomout { + background-position: -144px -112px; +} + +.ui-icon-search { + background-position: -160px -112px; +} + +.ui-icon-wrench { + background-position: -176px -112px; +} + +.ui-icon-gear { + background-position: -192px -112px; +} + +.ui-icon-heart { + background-position: -208px -112px; +} + +.ui-icon-star { + background-position: -224px -112px; +} + +.ui-icon-link { + background-position: -240px -112px; +} + +.ui-icon-cancel { + background-position: 0 -128px; +} + +.ui-icon-plus { + background-position: -16px -128px; +} + +.ui-icon-plusthick { + background-position: -32px -128px; +} + +.ui-icon-minus { + background-position: -48px -128px; +} + +.ui-icon-minusthick { + background-position: -64px -128px; +} + +.ui-icon-close { + background-position: -80px -128px; +} + +.ui-icon-closethick { + background-position: -96px -128px; +} + +.ui-icon-key { + background-position: -112px -128px; +} + +.ui-icon-lightbulb { + background-position: -128px -128px; +} + +.ui-icon-scissors { + background-position: -144px -128px; +} + +.ui-icon-clipboard { + background-position: -160px -128px; +} + +.ui-icon-copy { + background-position: -176px -128px; +} + +.ui-icon-contact { + background-position: -192px -128px; +} + +.ui-icon-image { + background-position: -208px -128px; +} + +.ui-icon-video { + background-position: -224px -128px; +} + +.ui-icon-script { + background-position: -240px -128px; +} + +.ui-icon-alert { + background-position: 0 -144px; +} + +.ui-icon-info { + background-position: -16px -144px; +} + +.ui-icon-notice { + background-position: -32px -144px; +} + +.ui-icon-help { + background-position: -48px -144px; +} + +.ui-icon-check { + background-position: -64px -144px; +} + +.ui-icon-bullet { + background-position: -80px -144px; +} + +.ui-icon-radio-on { + background-position: -96px -144px; +} + +.ui-icon-radio-off { + background-position: -112px -144px; +} + +.ui-icon-pin-w { + background-position: -128px -144px; +} + +.ui-icon-pin-s { + background-position: -144px -144px; +} + +.ui-icon-play { + background-position: 0 -160px; +} + +.ui-icon-pause { + background-position: -16px -160px; +} + +.ui-icon-seek-next { + background-position: -32px -160px; +} + +.ui-icon-seek-prev { + background-position: -48px -160px; +} + +.ui-icon-seek-end { + background-position: -64px -160px; +} + +.ui-icon-seek-start { + background-position: -80px -160px; +} + +.ui-icon-seek-first { + background-position: -80px -160px; +} + +.ui-icon-stop { + background-position: -96px -160px; +} + +.ui-icon-eject { + background-position: -112px -160px; +} + +.ui-icon-volume-off { + background-position: -128px -160px; +} + +.ui-icon-volume-on { + background-position: -144px -160px; +} + +.ui-icon-power { + background-position: 0 -176px; +} + +.ui-icon-signal-diag { + background-position: -16px -176px; +} + +.ui-icon-signal { + background-position: -32px -176px; +} + +.ui-icon-battery-0 { + background-position: -48px -176px; +} + +.ui-icon-battery-1 { + background-position: -64px -176px; +} + +.ui-icon-battery-2 { + background-position: -80px -176px; +} + +.ui-icon-battery-3 { + background-position: -96px -176px; +} + +.ui-icon-circle-plus { + background-position: 0 -192px; +} + +.ui-icon-circle-minus { + background-position: -16px -192px; +} + +.ui-icon-circle-close { + background-position: -32px -192px; +} + +.ui-icon-circle-triangle-e { + background-position: -48px -192px; +} + +.ui-icon-circle-triangle-s { + background-position: -64px -192px; +} + +.ui-icon-circle-triangle-w { + background-position: -80px -192px; +} + +.ui-icon-circle-triangle-n { + background-position: -96px -192px; +} + +.ui-icon-circle-arrow-e { + background-position: -112px -192px; +} + +.ui-icon-circle-arrow-s { + background-position: -128px -192px; +} + +.ui-icon-circle-arrow-w { + background-position: -144px -192px; +} + +.ui-icon-circle-arrow-n { + background-position: -160px -192px; +} + +.ui-icon-circle-zoomin { + background-position: -176px -192px; +} + +.ui-icon-circle-zoomout { + background-position: -192px -192px; +} + +.ui-icon-circle-check { + background-position: -208px -192px; +} + +.ui-icon-circlesmall-plus { + background-position: 0 -208px; +} + +.ui-icon-circlesmall-minus { + background-position: -16px -208px; +} + +.ui-icon-circlesmall-close { + background-position: -32px -208px; +} + +.ui-icon-squaresmall-plus { + background-position: -48px -208px; +} + +.ui-icon-squaresmall-minus { + background-position: -64px -208px; +} + +.ui-icon-squaresmall-close { + background-position: -80px -208px; +} + +.ui-icon-grip-dotted-vertical { + background-position: 0 -224px; +} + +.ui-icon-grip-dotted-horizontal { + background-position: -16px -224px; +} + +.ui-icon-grip-solid-vertical { + background-position: -32px -224px; +} + +.ui-icon-grip-solid-horizontal { + background-position: -48px -224px; +} + +.ui-icon-gripsmall-diagonal-se { + background-position: -64px -224px; +} + +.ui-icon-grip-diagonal-se { + background-position: -80px -224px; +} + +.ui-corner-all, .ui-corner-top, .ui-corner-left, .ui-corner-tl { + border-top-left-radius: 3px; +} + +.ui-corner-all, .ui-corner-top, .ui-corner-right, .ui-corner-tr { + border-top-right-radius: 3px; +} + +.ui-corner-all, .ui-corner-bottom, .ui-corner-left, .ui-corner-bl { + border-bottom-left-radius: 3px; +} + +.ui-corner-all, .ui-corner-bottom, .ui-corner-right, .ui-corner-br { + border-bottom-right-radius: 3px; +} + +.ui-widget-overlay { + background: #aaa; + opacity: .3; + filter: Alpha(Opacity=30); +} + +.ui-widget-shadow { + -webkit-box-shadow: 0 0 5px #666; + box-shadow: 0 0 5px #666; +} diff --git a/WebSemanticService/semantic/Semantic.API/Web/CSS/local.css b/WebSemanticService/semantic/Semantic.API/Web/CSS/local.css new file mode 100644 index 0000000..a0d0af5 --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API/Web/CSS/local.css @@ -0,0 +1,164 @@ +html { + box-sizing: border-box; +} + +*, +*::before, +*::after { + box-sizing: inherit; +} + +html { + background-color: lightslategrey; + font-size: 14px; + -moz-osx-font-smoothing: grayscale; + -webkit-font-smoothing: antialiased; + min-width: 300px; + overflow-x: hidden; + overflow-y: scroll; + text-rendering: optimizeLegibility; +} + +body { + color: #666; + background: black; + font-family: "Source Sans Pro", "Helvetica", "Arial", sans-serif; + font-size: 1rem; + font-weight: 400; + line-height: 1.4; +} + +a { + text-decoration: none; + transition: all 0.3s cubic-bezier(.25, .8, .25, 1); +} + +div, h2, p { + margin: 0; + padding: 0; +} + +.header { + color: white; + padding: 40px 0 20px; + text-align: center; +} + + .header h1 { + font-size: 40px; + font-weight: bold; + } + + .header h2 a { + border-bottom: 1px solid rgba(255, 255, 255, 0.5); + color: white; + font-size: 20px; + opacity: 0.5; + } + + .header h2 a:hover { + border-bottom-color: white; + opacity: 1; + } + +.main { + margin: 0 auto; + max-width: 1040px; + padding: 10px; +} + +.column { + flex: 1; + flex-direction: column; +} + +.column-item { + background: white; + box-shadow: 0 1px 3px rgba(0, 0, 0, 0.12), 0 1px 2px rgba(0, 0, 0, 0.24); + color: #666; + display: flex; + flex: 1; + flex-direction: column; + flex-basis: auto; + margin: 10px; +} + +.article-body { + display: flex; + flex: 1; + flex-basis: auto; + flex-direction: column; + padding: 20px; +} + +.article-title { + color: #333; + flex-shrink: 0; + font-size: 1.4em; + font-weight: 700; + line-height: 1.2; +} + +.article-content { + display: block; + width: 100%; +} + +.article-info { + display: flex; + font-size: 0.85em; + justify-content: space-between; + margin-top: 10px; +} + +.full-textarea { + width: 100%; + min-height: 200px; + display: block; + border: solid 2px #000; + resize: none; +} + +.left-textarea { + width: 40%; + min-height: 200px; + float: left; + border: solid 1px #000; + resize: none; +} + +.between-textarea { + width: 20%; + height: auto; + float: left; + border: solid 1px #fff; + color: Black; + text-align: left; +} + +.right-textarea { + width: 40%; + min-height: 200px; + color: Black; + border: solid 1px #000; + text-align: justify; + resize: none; +} + +.left-half { + width: 40%; + min-height: 200px; + float: left; + border: solid 1px #000; + resize: none; +} + +.right-half { + width: 58%; + min-height: 200px; + color: Black; + border: solid 1px #000; + text-align: justify; + resize: none; + margin-left: 1%; +} diff --git a/WebSemanticService/semantic/Semantic.API/Web/CSS/reset-min.css b/WebSemanticService/semantic/Semantic.API/Web/CSS/reset-min.css new file mode 100644 index 0000000..00aae50 --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API/Web/CSS/reset-min.css @@ -0,0 +1 @@ +html,body,div,span,applet,object,iframe,h1,h2,h3,h4,h5,h6,p,blockquote,pre,a,abbr,acronym,address,big,cite,code,del,dfn,em,img,ins,kbd,q,s,samp,small,strike,strong,sub,sup,tt,var,b,u,i,center,dl,dt,dd,ol,ul,li,fieldset,form,label,legend,table,caption,tbody,tfoot,thead,tr,th,td,article,aside,canvas,details,embed,figure,figcaption,footer,header,hgroup,menu,nav,output,ruby,section,summary,time,mark,audio,video{margin:0;padding:0;border:0;font-size:100%;font:inherit;vertical-align:baseline}article,aside,details,figcaption,figure,footer,header,hgroup,menu,nav,section{display:block}body{line-height:1}ol,ul{list-style:none}blockquote,q{quotes:none}blockquote:before,blockquote:after,q:before,q:after{content:'';content:none}table{border-collapse:collapse;border-spacing:0} \ No newline at end of file diff --git a/WebSemanticService/semantic/Semantic.API/Web/HTML/index.html b/WebSemanticService/semantic/Semantic.API/Web/HTML/index.html new file mode 100644 index 0000000..ce17605 --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API/Web/HTML/index.html @@ -0,0 +1,207 @@ + + + + + + Semantic + + + + + + + + + + +
+
Semantic API
+
+ +
+
+
+
+

+ Примитивы работы с тектом +

+
+

+ +

+

+ +

+ +
+
+ +
+
+ +
+ +

+
+
+ [------] + [------] +
+
+
+ +
+
+

+ Использование стемминга +

+
+ +
+ +
+ +
+ +
+ +
+ +
+
+
+ [------] + [------] +
+
+
+ +
+
+

+ Использование лемматизатора +

+
+

+ +

+

+ +

+ +
+
+ +
+
+ +
+ +

+
+
+ [------] + [------] +
+
+
+ +
+
+

+ Поиск слов в тексте +

+
+

+ +

+

+ + + +

+

+

+ + +
+

+
+
+ [------] + [------] +
+
+
+ +
+
+

+ Поиск фраз в тексте +

+
+

+ +

+

+ + + +

+

+

+ + +
+

+
+
+ [------] + [------] +
+
+
+ +
+
+

+ Завтра будет лучше +

+
+
+
+ [------] + [------] +
+
+
+
+
+ + \ No newline at end of file diff --git a/WebSemanticService/semantic/Semantic.API/Web/JS/api.js b/WebSemanticService/semantic/Semantic.API/Web/JS/api.js new file mode 100644 index 0000000..8f2b350 --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API/Web/JS/api.js @@ -0,0 +1,349 @@ +$(function () { + $("#wordapisample").text("/api/text/words?text=..."); + $("#wordsplittype").change(function () { + switch ($("#wordsplittype").val()) { + case "1": + $("#wordapisample").text("/api/text/words?text=..."); + break + case "2": + $("#wordapisample").text("/api/text/words/unique?text=..."); + break + case "3": + $("#wordapisample").text("/api/text/words/clean?text=..."); + break + } + }); + + $("#stemapisample").text("/api/stem?text=..."); + $("#stemsplittype").change(function () { + switch ($("#stemsplittype").val()) { + case "1": + $("#stemapisample").text("/api/stem?text=..."); + break + case "2": + $("#stemapisample").text("/api/stem/unique?text=..."); + break + case "3": + $("#stemapisample").text("/api/stem/clean?text=..."); + break + } + }); + + $("#lemmapisample").text("/api/lemma?text=..."); + $("#lemmtype").change(function () { + switch ($("#lemmtype").val()) { + case "1": + $("#lemmapisample").text("/api/lemma?text=..."); + break + case "2": + $("#lemmapisample").text("/api/lemma/unique?text=..."); + break + case "3": + $("#lemmapisample").text("/api/lemma/clean?text=..."); + break + } + }); + + $("#wordssearchapisample").text("/api/text/occurences/words?text=...&words=[...,...]"); + $("#wordssearchtype").change(function () { + var usePost = $("#wordssearchusepost").prop("checked"); + switch ($("#wordssearchtype").val()) { + case "1": + if (usePost) + $("#wordssearchapisample").text("/api/text/occurences/words with WordsSearchRequest(string Text, string[] Words) in BODY"); + else + $("#wordssearchapisample").text("/api/text/occurences/words?text=...&words=[...,...]"); + break + case "2": + if (usePost) + $("#wordssearchapisample").text("/api/stem/occurences/words with WordsSearchRequest(string Text, string[] Words) in BODY"); + else + $("#wordssearchapisample").text("/api/stem/occurences/words?text=...&words=[...,...]"); + break + case "3": + if (usePost) + $("#wordssearchapisample").text("api/lemma/occurences/words with WordsSearchRequest(string Text, string[] Words) in BODY"); + else + $("#wordssearchapisample").text("api/lemma/occurences/words?text=...&words=[...,...]"); + break + } + }); + + $("#phrasessearchapisample").text("/api/text/occurences/phrases?text=...&words=[...,...]"); + $("#phrasessearchtype").change(function () { + var usePost = $("#phrasessearchusepost").prop("checked"); + switch ($("#phrasessearchtype").val()) { + case "1": + if (usePost) + $("#phrasessearchapisample").text("/api/text/occurences/phrases with WordsSearchRequest(string Text, string[] Words) in BODY"); + else + $("#phrasessearchapisample").text("/api/text/occurences/phrases?text=...&words=[...,...]"); + break + case "2": + if (usePost) + $("#phrasessearchapisample").text("/api/stem/occurences/phrases with WordsSearchRequest(string Text, string[] Words) in BODY"); + else + $("#phrasessearchapisample").text("/api/stem/occurences/phrases?text=...&words=[...,...]"); + break + case "3": + if (usePost) + $("#phrasessearchapisample").text("api/lemma/occurences/phrases with WordsSearchRequest(string Text, string[] Words) in BODY"); + else + $("#phrasessearchapisample").text("api/lemma/occurences/phrases?text=...&words=[...,...]"); + break + } + }); +}); + +/*----------------------------------------*/ +/*------------- MODELS -----------*/ +/*----------------------------------------*/ +function LexToken(entry) { + this.Word = entry.Word; + this.Token = entry.Token; + this.Position = entry.Position; +} + +function LexTokenCollection(entry) { + buf = [] + entry.forEach(function (item) { + buf.push(new LexToken(item)); + }); + this.Items = buf; +} + +function WordsOccurences(data) { + var occurences = {}; + for (var key in data) { + occurences[key] = new LexTokenCollection(data[key]); + } + this.Occurences = occurences; +} + +function PhrasesOccurences(data) { + var occurences = {}; + for (var key in data) { + occurences[key] = []; + data[key].forEach(function (entry) { + occurences[key].push(new LexTokenCollection(entry)); + }); + } + this.Occurences = occurences; +} +/*----------------------------------------*/ +/*------------- TEXT TO TOKENS -----------*/ +/*----------------------------------------*/ +function getWords() { + var usePost = $("#wordsusepost").prop("checked"); + var text = $("#primitivetext").val(); + switch ($("#wordsplittype").val()) { + case '1': + requestTokens(usePost, "/api/text/words", text, function (list) { + $("#primitiveout").val(list.map(e=>e.Word).join(", ")); + }, "#wordaddinfo"); + break; + case '2': + requestTokens(usePost, "/api/text/words/unique", text, function (list) { + $("#primitiveout").val(list.map(e=>e.Word).join(", ")); + }, "#wordaddinfo"); + break; + case '3': + requestTokens(usePost, "/api/text/words/clean", text, function (list) { + $("#primitiveout").val(list.map(e=>e.Word).join(", ")); + }, "#wordaddinfo"); + break; + } +} + +function getStems() { + var usePost = $("#stemsusepost").prop("checked"); + var text = $("#stemtext").val(); + switch ($("#stemsplittype").val()) { + case '1': + requestTokens(usePost, "/api/stem", text, function (list) { + $("#stemout").val(list.map(e=>e.Token).join(", ")); + }, "#stemaddinfo"); + break; + case '2': + requestTokens(usePost, "/api/stem/unique", text, function (list) { + $("#stemout").val(list.map(e=>e.Token).join(", ")); + }, "#stemaddinfo"); + break; + case '3': + requestTokens(usePost, "/api/stem/clean", text, function (list) { + $("#stemout").val(list.map(e=>e.Token).join(", ")); + }, "#stemaddinfo"); + break; + } +} + +function getLemms() { + var usePost = $("#lemmausepost").prop("checked"); + var text = $("#lemmatext").val(); + switch ($("#lemmtype").val()) { + case '1': + requestTokens(usePost, "/api/lemma", text, function (list) { + $("#lemmaout").val(list.map(e=>e.Token).join(", ")); + }, "#lemmaddinfo"); + break; + case '2': + requestTokens(usePost, "/api/lemma/unique", text, function (list) { + $("#lemmaout").val(list.map(e=>e.Token).join(", ")); + }, "#lemmaddinfo"); + break; + case '3': + requestTokens(usePost, "/api/lemma/clean", text, function (list) { + $("#lemmaout").val(list.map(e=>e.Token).join(", ")); + }, "#lemmaddinfo"); + break; + } +} +/*----------------------------------------*/ +/*------------- SEARCH IN TEXT -----------*/ +/*----------------------------------------*/ +function searchWordsInText() { + var usePost = $("#wordssearchusepost").prop("checked"); + var text = $("#wordssearchtext").val(); + var words = $("#searchwords").val().split(' '); + switch ($("#wordssearchtype").val()) { + case '1': + requestWordsOccurences(usePost, "api/text/occurences/words", text, words, + function (result) { + var text = []; + for (var key in result.Occurences) { + text.push(key + " => " + result.Occurences[key].Items.map(e => e.Token + " (" + e.Position + ")").join(', ')) + } + $("#wordssearchtextout").val(text.join('\r\n')); + }, "#wordssearchaddinfo"); + break; + case '2': + requestWordsOccurences(usePost, "api/stem/occurences/words", text, words, + function (result) { + var text = []; + for (var key in result.Occurences) { + text.push(key + " => " + result.Occurences[key].Items.map(e => e.Token + " (" + e.Word + " on " + e.Position + ")").join(', ')) + } + $("#wordssearchtextout").val(text.join('\r\n')); + }, "#wordssearchaddinfo"); + break; + case '3': + requestWordsOccurences(usePost, "api/lemma/occurences/words", text, words, + function (result) { + var text = []; + for (var key in result.Occurences) { + text.push(key + " => " + result.Occurences[key].Items.map(e => e.Token + " (" + e.Word + " on " + e.Position + ")").join(', ')) + } + $("#wordssearchtextout").val(text.join('\r\n')); + }, "#wordssearchaddinfo"); + break; + } +} + +function searchPhrasesInText() { + var usePost = $("#phrasessearchusepost").prop("checked"); + var text = $("#phrasessearchtext").val(); + var words = $("#searchphrases").val().split(';'); + switch ($("#phrasessearchtype").val()) { + case '1': + requestPhrasesOccurences(usePost, "api/text/occurences/phrases", text, words, + function (result) { + var text = []; + for (var key in result.Occurences) { + result.Occurences[key].forEach(function (set) { + text.push(key + " => " + set.Items.map(e => e.Token + " (" + e.Position + ")").join(', ')) + }); + } + $("#phrasessearchtextout").val(text.join('\r\n')); + }, "#phrasessearchaddinfo"); + break; + case '2': + requestPhrasesOccurences(usePost, "api/stem/occurences/phrases", text, words, + function (result) { + var text = []; + for (var key in result.Occurences) { + result.Occurences[key].forEach(function (set) { + text.push(key + " => " + set.Items.map(e => e.Token + " (" + e.Position + ")").join(', ')) + }); + } + $("#phrasessearchtextout").val(text.join('\r\n')); + }, "#phrasessearchaddinfo"); + break; + case '3': + requestPhrasesOccurences(usePost, "api/lemma/occurences/phrases", text, words, + function (result) { + var text = []; + for (var key in result.Occurences) { + result.Occurences[key].forEach(function (set) { + text.push(key + " => " + set.Items.map(e => e.Token + " (" + e.Position + ")").join(', ')) + }); + } + $("#phrasessearchtextout").val(text.join('\r\n')); + }, "#phrasessearchaddinfo"); + break; + } +} +/*----------------------------------------*/ +/*----------- AJAX FOR TOKENS ------------*/ +/*----------------------------------------*/ +function requestTokens(usePost, resource, data, callback, informer) { + var mapper = function (val) { + var set = []; + val.forEach(function (entry) { + set.push(new LexToken(entry)); + }); + return set; + }; + var payload = { text: data }; + if (usePost) + post(resource, data, mapper, callback, informer); + else + get(resource, payload, mapper, callback, informer); +} +/*----------------------------------------*/ +/*------- AJAX FOR WORD OCCURENCES -------*/ +/*----------------------------------------*/ +function requestWordsOccurences(usePost, resource, data, search, callback, informer) { + var mapper = function (val) { return new WordsOccurences(val); }; + var payload = { text: data, words: search }; + if (usePost) + post(resource, payload, mapper, callback, informer); + else + get(resource, payload, mapper, callback, informer); +} +/*----------------------------------------*/ +/*----- AJAX FOR PHRASES OCCURENCES ------*/ +/*----------------------------------------*/ +function requestPhrasesOccurences(usePost, resource, data, search, callback, informer) { + var mapper = function (val) { return new PhrasesOccurences(val); }; + var payload = { text: data, words: search }; + if (usePost) + post(resource, payload, mapper, callback, informer); + else + get(resource, payload, mapper, callback, informer); +} +/*----------------------------------------*/ +/*----- AJAX REQUESTS ------*/ +/*----------------------------------------*/ +function get(resource, payload, mapper, callback, informer) { + var ajaxTime = new Date().getTime(); + $.get(resource, payload, function (val) { + var totalTime = new Date().getTime() - ajaxTime; + $(informer).text(totalTime + " ms"); + callback(mapper(val)); + }); +} + +function post(resource, payload, mapper, callback, informer) { + var ajaxTime = new Date().getTime(); + $.ajax({ + type: "POST", + url: resource, + contentType: "application/json; charset=utf-8", + data: JSON.stringify(payload), + success: function (val) { + var totalTime = new Date().getTime() - ajaxTime; + $(informer).text(totalTime + " ms"); + callback(mapper(val)); + } + }); +} \ No newline at end of file diff --git a/WebSemanticService/semantic/Semantic.API/Web/JS/jquery-3.1.1.min.js b/WebSemanticService/semantic/Semantic.API/Web/JS/jquery-3.1.1.min.js new file mode 100644 index 0000000..4c5be4c --- /dev/null +++ b/WebSemanticService/semantic/Semantic.API/Web/JS/jquery-3.1.1.min.js @@ -0,0 +1,4 @@ +/*! jQuery v3.1.1 | (c) jQuery Foundation | jquery.org/license */ +!function(a,b){"use strict";"object"==typeof module&&"object"==typeof module.exports?module.exports=a.document?b(a,!0):function(a){if(!a.document)throw new Error("jQuery requires a window with a document");return b(a)}:b(a)}("undefined"!=typeof window?window:this,function(a,b){"use strict";var c=[],d=a.document,e=Object.getPrototypeOf,f=c.slice,g=c.concat,h=c.push,i=c.indexOf,j={},k=j.toString,l=j.hasOwnProperty,m=l.toString,n=m.call(Object),o={};function p(a,b){b=b||d;var c=b.createElement("script");c.text=a,b.head.appendChild(c).parentNode.removeChild(c)}var q="3.1.1",r=function(a,b){return new r.fn.init(a,b)},s=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g,t=/^-ms-/,u=/-([a-z])/g,v=function(a,b){return b.toUpperCase()};r.fn=r.prototype={jquery:q,constructor:r,length:0,toArray:function(){return f.call(this)},get:function(a){return null==a?f.call(this):a<0?this[a+this.length]:this[a]},pushStack:function(a){var b=r.merge(this.constructor(),a);return b.prevObject=this,b},each:function(a){return r.each(this,a)},map:function(a){return this.pushStack(r.map(this,function(b,c){return a.call(b,c,b)}))},slice:function(){return this.pushStack(f.apply(this,arguments))},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},eq:function(a){var b=this.length,c=+a+(a<0?b:0);return this.pushStack(c>=0&&c0&&b-1 in a)}var x=function(a){var b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u="sizzle"+1*new Date,v=a.document,w=0,x=0,y=ha(),z=ha(),A=ha(),B=function(a,b){return a===b&&(l=!0),0},C={}.hasOwnProperty,D=[],E=D.pop,F=D.push,G=D.push,H=D.slice,I=function(a,b){for(var c=0,d=a.length;c+~]|"+K+")"+K+"*"),S=new RegExp("="+K+"*([^\\]'\"]*?)"+K+"*\\]","g"),T=new RegExp(N),U=new RegExp("^"+L+"$"),V={ID:new RegExp("^#("+L+")"),CLASS:new RegExp("^\\.("+L+")"),TAG:new RegExp("^("+L+"|[*])"),ATTR:new RegExp("^"+M),PSEUDO:new RegExp("^"+N),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+K+"*(even|odd|(([+-]|)(\\d*)n|)"+K+"*(?:([+-]|)"+K+"*(\\d+)|))"+K+"*\\)|)","i"),bool:new RegExp("^(?:"+J+")$","i"),needsContext:new RegExp("^"+K+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+K+"*((?:-\\d)?\\d*)"+K+"*\\)|)(?=[^-]|$)","i")},W=/^(?:input|select|textarea|button)$/i,X=/^h\d$/i,Y=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,$=/[+~]/,_=new RegExp("\\\\([\\da-f]{1,6}"+K+"?|("+K+")|.)","ig"),aa=function(a,b,c){var d="0x"+b-65536;return d!==d||c?b:d<0?String.fromCharCode(d+65536):String.fromCharCode(d>>10|55296,1023&d|56320)},ba=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ca=function(a,b){return b?"\0"===a?"\ufffd":a.slice(0,-1)+"\\"+a.charCodeAt(a.length-1).toString(16)+" ":"\\"+a},da=function(){m()},ea=ta(function(a){return a.disabled===!0&&("form"in a||"label"in a)},{dir:"parentNode",next:"legend"});try{G.apply(D=H.call(v.childNodes),v.childNodes),D[v.childNodes.length].nodeType}catch(fa){G={apply:D.length?function(a,b){F.apply(a,H.call(b))}:function(a,b){var c=a.length,d=0;while(a[c++]=b[d++]);a.length=c-1}}}function ga(a,b,d,e){var f,h,j,k,l,o,r,s=b&&b.ownerDocument,w=b?b.nodeType:9;if(d=d||[],"string"!=typeof a||!a||1!==w&&9!==w&&11!==w)return d;if(!e&&((b?b.ownerDocument||b:v)!==n&&m(b),b=b||n,p)){if(11!==w&&(l=Z.exec(a)))if(f=l[1]){if(9===w){if(!(j=b.getElementById(f)))return d;if(j.id===f)return d.push(j),d}else if(s&&(j=s.getElementById(f))&&t(b,j)&&j.id===f)return d.push(j),d}else{if(l[2])return G.apply(d,b.getElementsByTagName(a)),d;if((f=l[3])&&c.getElementsByClassName&&b.getElementsByClassName)return G.apply(d,b.getElementsByClassName(f)),d}if(c.qsa&&!A[a+" "]&&(!q||!q.test(a))){if(1!==w)s=b,r=a;else if("object"!==b.nodeName.toLowerCase()){(k=b.getAttribute("id"))?k=k.replace(ba,ca):b.setAttribute("id",k=u),o=g(a),h=o.length;while(h--)o[h]="#"+k+" "+sa(o[h]);r=o.join(","),s=$.test(a)&&qa(b.parentNode)||b}if(r)try{return G.apply(d,s.querySelectorAll(r)),d}catch(x){}finally{k===u&&b.removeAttribute("id")}}}return i(a.replace(P,"$1"),b,d,e)}function ha(){var a=[];function b(c,e){return a.push(c+" ")>d.cacheLength&&delete b[a.shift()],b[c+" "]=e}return b}function ia(a){return a[u]=!0,a}function ja(a){var b=n.createElement("fieldset");try{return!!a(b)}catch(c){return!1}finally{b.parentNode&&b.parentNode.removeChild(b),b=null}}function ka(a,b){var c=a.split("|"),e=c.length;while(e--)d.attrHandle[c[e]]=b}function la(a,b){var c=b&&a,d=c&&1===a.nodeType&&1===b.nodeType&&a.sourceIndex-b.sourceIndex;if(d)return d;if(c)while(c=c.nextSibling)if(c===b)return-1;return a?1:-1}function ma(a){return function(b){var c=b.nodeName.toLowerCase();return"input"===c&&b.type===a}}function na(a){return function(b){var c=b.nodeName.toLowerCase();return("input"===c||"button"===c)&&b.type===a}}function oa(a){return function(b){return"form"in b?b.parentNode&&b.disabled===!1?"label"in b?"label"in b.parentNode?b.parentNode.disabled===a:b.disabled===a:b.isDisabled===a||b.isDisabled!==!a&&ea(b)===a:b.disabled===a:"label"in b&&b.disabled===a}}function pa(a){return ia(function(b){return b=+b,ia(function(c,d){var e,f=a([],c.length,b),g=f.length;while(g--)c[e=f[g]]&&(c[e]=!(d[e]=c[e]))})})}function qa(a){return a&&"undefined"!=typeof a.getElementsByTagName&&a}c=ga.support={},f=ga.isXML=function(a){var b=a&&(a.ownerDocument||a).documentElement;return!!b&&"HTML"!==b.nodeName},m=ga.setDocument=function(a){var b,e,g=a?a.ownerDocument||a:v;return g!==n&&9===g.nodeType&&g.documentElement?(n=g,o=n.documentElement,p=!f(n),v!==n&&(e=n.defaultView)&&e.top!==e&&(e.addEventListener?e.addEventListener("unload",da,!1):e.attachEvent&&e.attachEvent("onunload",da)),c.attributes=ja(function(a){return a.className="i",!a.getAttribute("className")}),c.getElementsByTagName=ja(function(a){return a.appendChild(n.createComment("")),!a.getElementsByTagName("*").length}),c.getElementsByClassName=Y.test(n.getElementsByClassName),c.getById=ja(function(a){return o.appendChild(a).id=u,!n.getElementsByName||!n.getElementsByName(u).length}),c.getById?(d.filter.ID=function(a){var b=a.replace(_,aa);return function(a){return a.getAttribute("id")===b}},d.find.ID=function(a,b){if("undefined"!=typeof b.getElementById&&p){var c=b.getElementById(a);return c?[c]:[]}}):(d.filter.ID=function(a){var b=a.replace(_,aa);return function(a){var c="undefined"!=typeof a.getAttributeNode&&a.getAttributeNode("id");return c&&c.value===b}},d.find.ID=function(a,b){if("undefined"!=typeof b.getElementById&&p){var c,d,e,f=b.getElementById(a);if(f){if(c=f.getAttributeNode("id"),c&&c.value===a)return[f];e=b.getElementsByName(a),d=0;while(f=e[d++])if(c=f.getAttributeNode("id"),c&&c.value===a)return[f]}return[]}}),d.find.TAG=c.getElementsByTagName?function(a,b){return"undefined"!=typeof b.getElementsByTagName?b.getElementsByTagName(a):c.qsa?b.querySelectorAll(a):void 0}:function(a,b){var c,d=[],e=0,f=b.getElementsByTagName(a);if("*"===a){while(c=f[e++])1===c.nodeType&&d.push(c);return d}return f},d.find.CLASS=c.getElementsByClassName&&function(a,b){if("undefined"!=typeof b.getElementsByClassName&&p)return b.getElementsByClassName(a)},r=[],q=[],(c.qsa=Y.test(n.querySelectorAll))&&(ja(function(a){o.appendChild(a).innerHTML="",a.querySelectorAll("[msallowcapture^='']").length&&q.push("[*^$]="+K+"*(?:''|\"\")"),a.querySelectorAll("[selected]").length||q.push("\\["+K+"*(?:value|"+J+")"),a.querySelectorAll("[id~="+u+"-]").length||q.push("~="),a.querySelectorAll(":checked").length||q.push(":checked"),a.querySelectorAll("a#"+u+"+*").length||q.push(".#.+[+~]")}),ja(function(a){a.innerHTML="";var b=n.createElement("input");b.setAttribute("type","hidden"),a.appendChild(b).setAttribute("name","D"),a.querySelectorAll("[name=d]").length&&q.push("name"+K+"*[*^$|!~]?="),2!==a.querySelectorAll(":enabled").length&&q.push(":enabled",":disabled"),o.appendChild(a).disabled=!0,2!==a.querySelectorAll(":disabled").length&&q.push(":enabled",":disabled"),a.querySelectorAll("*,:x"),q.push(",.*:")})),(c.matchesSelector=Y.test(s=o.matches||o.webkitMatchesSelector||o.mozMatchesSelector||o.oMatchesSelector||o.msMatchesSelector))&&ja(function(a){c.disconnectedMatch=s.call(a,"*"),s.call(a,"[s!='']:x"),r.push("!=",N)}),q=q.length&&new RegExp(q.join("|")),r=r.length&&new RegExp(r.join("|")),b=Y.test(o.compareDocumentPosition),t=b||Y.test(o.contains)?function(a,b){var c=9===a.nodeType?a.documentElement:a,d=b&&b.parentNode;return a===d||!(!d||1!==d.nodeType||!(c.contains?c.contains(d):a.compareDocumentPosition&&16&a.compareDocumentPosition(d)))}:function(a,b){if(b)while(b=b.parentNode)if(b===a)return!0;return!1},B=b?function(a,b){if(a===b)return l=!0,0;var d=!a.compareDocumentPosition-!b.compareDocumentPosition;return d?d:(d=(a.ownerDocument||a)===(b.ownerDocument||b)?a.compareDocumentPosition(b):1,1&d||!c.sortDetached&&b.compareDocumentPosition(a)===d?a===n||a.ownerDocument===v&&t(v,a)?-1:b===n||b.ownerDocument===v&&t(v,b)?1:k?I(k,a)-I(k,b):0:4&d?-1:1)}:function(a,b){if(a===b)return l=!0,0;var c,d=0,e=a.parentNode,f=b.parentNode,g=[a],h=[b];if(!e||!f)return a===n?-1:b===n?1:e?-1:f?1:k?I(k,a)-I(k,b):0;if(e===f)return la(a,b);c=a;while(c=c.parentNode)g.unshift(c);c=b;while(c=c.parentNode)h.unshift(c);while(g[d]===h[d])d++;return d?la(g[d],h[d]):g[d]===v?-1:h[d]===v?1:0},n):n},ga.matches=function(a,b){return ga(a,null,null,b)},ga.matchesSelector=function(a,b){if((a.ownerDocument||a)!==n&&m(a),b=b.replace(S,"='$1']"),c.matchesSelector&&p&&!A[b+" "]&&(!r||!r.test(b))&&(!q||!q.test(b)))try{var d=s.call(a,b);if(d||c.disconnectedMatch||a.document&&11!==a.document.nodeType)return d}catch(e){}return ga(b,n,null,[a]).length>0},ga.contains=function(a,b){return(a.ownerDocument||a)!==n&&m(a),t(a,b)},ga.attr=function(a,b){(a.ownerDocument||a)!==n&&m(a);var e=d.attrHandle[b.toLowerCase()],f=e&&C.call(d.attrHandle,b.toLowerCase())?e(a,b,!p):void 0;return void 0!==f?f:c.attributes||!p?a.getAttribute(b):(f=a.getAttributeNode(b))&&f.specified?f.value:null},ga.escape=function(a){return(a+"").replace(ba,ca)},ga.error=function(a){throw new Error("Syntax error, unrecognized expression: "+a)},ga.uniqueSort=function(a){var b,d=[],e=0,f=0;if(l=!c.detectDuplicates,k=!c.sortStable&&a.slice(0),a.sort(B),l){while(b=a[f++])b===a[f]&&(e=d.push(f));while(e--)a.splice(d[e],1)}return k=null,a},e=ga.getText=function(a){var b,c="",d=0,f=a.nodeType;if(f){if(1===f||9===f||11===f){if("string"==typeof a.textContent)return a.textContent;for(a=a.firstChild;a;a=a.nextSibling)c+=e(a)}else if(3===f||4===f)return a.nodeValue}else while(b=a[d++])c+=e(b);return c},d=ga.selectors={cacheLength:50,createPseudo:ia,match:V,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(a){return a[1]=a[1].replace(_,aa),a[3]=(a[3]||a[4]||a[5]||"").replace(_,aa),"~="===a[2]&&(a[3]=" "+a[3]+" "),a.slice(0,4)},CHILD:function(a){return a[1]=a[1].toLowerCase(),"nth"===a[1].slice(0,3)?(a[3]||ga.error(a[0]),a[4]=+(a[4]?a[5]+(a[6]||1):2*("even"===a[3]||"odd"===a[3])),a[5]=+(a[7]+a[8]||"odd"===a[3])):a[3]&&ga.error(a[0]),a},PSEUDO:function(a){var b,c=!a[6]&&a[2];return V.CHILD.test(a[0])?null:(a[3]?a[2]=a[4]||a[5]||"":c&&T.test(c)&&(b=g(c,!0))&&(b=c.indexOf(")",c.length-b)-c.length)&&(a[0]=a[0].slice(0,b),a[2]=c.slice(0,b)),a.slice(0,3))}},filter:{TAG:function(a){var b=a.replace(_,aa).toLowerCase();return"*"===a?function(){return!0}:function(a){return a.nodeName&&a.nodeName.toLowerCase()===b}},CLASS:function(a){var b=y[a+" "];return b||(b=new RegExp("(^|"+K+")"+a+"("+K+"|$)"))&&y(a,function(a){return b.test("string"==typeof a.className&&a.className||"undefined"!=typeof a.getAttribute&&a.getAttribute("class")||"")})},ATTR:function(a,b,c){return function(d){var e=ga.attr(d,a);return null==e?"!="===b:!b||(e+="","="===b?e===c:"!="===b?e!==c:"^="===b?c&&0===e.indexOf(c):"*="===b?c&&e.indexOf(c)>-1:"$="===b?c&&e.slice(-c.length)===c:"~="===b?(" "+e.replace(O," ")+" ").indexOf(c)>-1:"|="===b&&(e===c||e.slice(0,c.length+1)===c+"-"))}},CHILD:function(a,b,c,d,e){var f="nth"!==a.slice(0,3),g="last"!==a.slice(-4),h="of-type"===b;return 1===d&&0===e?function(a){return!!a.parentNode}:function(b,c,i){var j,k,l,m,n,o,p=f!==g?"nextSibling":"previousSibling",q=b.parentNode,r=h&&b.nodeName.toLowerCase(),s=!i&&!h,t=!1;if(q){if(f){while(p){m=b;while(m=m[p])if(h?m.nodeName.toLowerCase()===r:1===m.nodeType)return!1;o=p="only"===a&&!o&&"nextSibling"}return!0}if(o=[g?q.firstChild:q.lastChild],g&&s){m=q,l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),j=k[a]||[],n=j[0]===w&&j[1],t=n&&j[2],m=n&&q.childNodes[n];while(m=++n&&m&&m[p]||(t=n=0)||o.pop())if(1===m.nodeType&&++t&&m===b){k[a]=[w,n,t];break}}else if(s&&(m=b,l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),j=k[a]||[],n=j[0]===w&&j[1],t=n),t===!1)while(m=++n&&m&&m[p]||(t=n=0)||o.pop())if((h?m.nodeName.toLowerCase()===r:1===m.nodeType)&&++t&&(s&&(l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),k[a]=[w,t]),m===b))break;return t-=e,t===d||t%d===0&&t/d>=0}}},PSEUDO:function(a,b){var c,e=d.pseudos[a]||d.setFilters[a.toLowerCase()]||ga.error("unsupported pseudo: "+a);return e[u]?e(b):e.length>1?(c=[a,a,"",b],d.setFilters.hasOwnProperty(a.toLowerCase())?ia(function(a,c){var d,f=e(a,b),g=f.length;while(g--)d=I(a,f[g]),a[d]=!(c[d]=f[g])}):function(a){return e(a,0,c)}):e}},pseudos:{not:ia(function(a){var b=[],c=[],d=h(a.replace(P,"$1"));return d[u]?ia(function(a,b,c,e){var f,g=d(a,null,e,[]),h=a.length;while(h--)(f=g[h])&&(a[h]=!(b[h]=f))}):function(a,e,f){return b[0]=a,d(b,null,f,c),b[0]=null,!c.pop()}}),has:ia(function(a){return function(b){return ga(a,b).length>0}}),contains:ia(function(a){return a=a.replace(_,aa),function(b){return(b.textContent||b.innerText||e(b)).indexOf(a)>-1}}),lang:ia(function(a){return U.test(a||"")||ga.error("unsupported lang: "+a),a=a.replace(_,aa).toLowerCase(),function(b){var c;do if(c=p?b.lang:b.getAttribute("xml:lang")||b.getAttribute("lang"))return c=c.toLowerCase(),c===a||0===c.indexOf(a+"-");while((b=b.parentNode)&&1===b.nodeType);return!1}}),target:function(b){var c=a.location&&a.location.hash;return c&&c.slice(1)===b.id},root:function(a){return a===o},focus:function(a){return a===n.activeElement&&(!n.hasFocus||n.hasFocus())&&!!(a.type||a.href||~a.tabIndex)},enabled:oa(!1),disabled:oa(!0),checked:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&!!a.checked||"option"===b&&!!a.selected},selected:function(a){return a.parentNode&&a.parentNode.selectedIndex,a.selected===!0},empty:function(a){for(a=a.firstChild;a;a=a.nextSibling)if(a.nodeType<6)return!1;return!0},parent:function(a){return!d.pseudos.empty(a)},header:function(a){return X.test(a.nodeName)},input:function(a){return W.test(a.nodeName)},button:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&"button"===a.type||"button"===b},text:function(a){var b;return"input"===a.nodeName.toLowerCase()&&"text"===a.type&&(null==(b=a.getAttribute("type"))||"text"===b.toLowerCase())},first:pa(function(){return[0]}),last:pa(function(a,b){return[b-1]}),eq:pa(function(a,b,c){return[c<0?c+b:c]}),even:pa(function(a,b){for(var c=0;c=0;)a.push(d);return a}),gt:pa(function(a,b,c){for(var d=c<0?c+b:c;++d1?function(b,c,d){var e=a.length;while(e--)if(!a[e](b,c,d))return!1;return!0}:a[0]}function va(a,b,c){for(var d=0,e=b.length;d-1&&(f[j]=!(g[j]=l))}}else r=wa(r===g?r.splice(o,r.length):r),e?e(null,g,r,i):G.apply(g,r)})}function ya(a){for(var b,c,e,f=a.length,g=d.relative[a[0].type],h=g||d.relative[" "],i=g?1:0,k=ta(function(a){return a===b},h,!0),l=ta(function(a){return I(b,a)>-1},h,!0),m=[function(a,c,d){var e=!g&&(d||c!==j)||((b=c).nodeType?k(a,c,d):l(a,c,d));return b=null,e}];i1&&ua(m),i>1&&sa(a.slice(0,i-1).concat({value:" "===a[i-2].type?"*":""})).replace(P,"$1"),c,i0,e=a.length>0,f=function(f,g,h,i,k){var l,o,q,r=0,s="0",t=f&&[],u=[],v=j,x=f||e&&d.find.TAG("*",k),y=w+=null==v?1:Math.random()||.1,z=x.length;for(k&&(j=g===n||g||k);s!==z&&null!=(l=x[s]);s++){if(e&&l){o=0,g||l.ownerDocument===n||(m(l),h=!p);while(q=a[o++])if(q(l,g||n,h)){i.push(l);break}k&&(w=y)}c&&((l=!q&&l)&&r--,f&&t.push(l))}if(r+=s,c&&s!==r){o=0;while(q=b[o++])q(t,u,g,h);if(f){if(r>0)while(s--)t[s]||u[s]||(u[s]=E.call(i));u=wa(u)}G.apply(i,u),k&&!f&&u.length>0&&r+b.length>1&&ga.uniqueSort(i)}return k&&(w=y,j=v),t};return c?ia(f):f}return h=ga.compile=function(a,b){var c,d=[],e=[],f=A[a+" "];if(!f){b||(b=g(a)),c=b.length;while(c--)f=ya(b[c]),f[u]?d.push(f):e.push(f);f=A(a,za(e,d)),f.selector=a}return f},i=ga.select=function(a,b,c,e){var f,i,j,k,l,m="function"==typeof a&&a,n=!e&&g(a=m.selector||a);if(c=c||[],1===n.length){if(i=n[0]=n[0].slice(0),i.length>2&&"ID"===(j=i[0]).type&&9===b.nodeType&&p&&d.relative[i[1].type]){if(b=(d.find.ID(j.matches[0].replace(_,aa),b)||[])[0],!b)return c;m&&(b=b.parentNode),a=a.slice(i.shift().value.length)}f=V.needsContext.test(a)?0:i.length;while(f--){if(j=i[f],d.relative[k=j.type])break;if((l=d.find[k])&&(e=l(j.matches[0].replace(_,aa),$.test(i[0].type)&&qa(b.parentNode)||b))){if(i.splice(f,1),a=e.length&&sa(i),!a)return G.apply(c,e),c;break}}}return(m||h(a,n))(e,b,!p,c,!b||$.test(a)&&qa(b.parentNode)||b),c},c.sortStable=u.split("").sort(B).join("")===u,c.detectDuplicates=!!l,m(),c.sortDetached=ja(function(a){return 1&a.compareDocumentPosition(n.createElement("fieldset"))}),ja(function(a){return a.innerHTML="","#"===a.firstChild.getAttribute("href")})||ka("type|href|height|width",function(a,b,c){if(!c)return a.getAttribute(b,"type"===b.toLowerCase()?1:2)}),c.attributes&&ja(function(a){return a.innerHTML="",a.firstChild.setAttribute("value",""),""===a.firstChild.getAttribute("value")})||ka("value",function(a,b,c){if(!c&&"input"===a.nodeName.toLowerCase())return a.defaultValue}),ja(function(a){return null==a.getAttribute("disabled")})||ka(J,function(a,b,c){var d;if(!c)return a[b]===!0?b.toLowerCase():(d=a.getAttributeNode(b))&&d.specified?d.value:null}),ga}(a);r.find=x,r.expr=x.selectors,r.expr[":"]=r.expr.pseudos,r.uniqueSort=r.unique=x.uniqueSort,r.text=x.getText,r.isXMLDoc=x.isXML,r.contains=x.contains,r.escapeSelector=x.escape;var y=function(a,b,c){var d=[],e=void 0!==c;while((a=a[b])&&9!==a.nodeType)if(1===a.nodeType){if(e&&r(a).is(c))break;d.push(a)}return d},z=function(a,b){for(var c=[];a;a=a.nextSibling)1===a.nodeType&&a!==b&&c.push(a);return c},A=r.expr.match.needsContext,B=/^<([a-z][^\/\0>:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i,C=/^.[^:#\[\.,]*$/;function D(a,b,c){return r.isFunction(b)?r.grep(a,function(a,d){return!!b.call(a,d,a)!==c}):b.nodeType?r.grep(a,function(a){return a===b!==c}):"string"!=typeof b?r.grep(a,function(a){return i.call(b,a)>-1!==c}):C.test(b)?r.filter(b,a,c):(b=r.filter(b,a),r.grep(a,function(a){return i.call(b,a)>-1!==c&&1===a.nodeType}))}r.filter=function(a,b,c){var d=b[0];return c&&(a=":not("+a+")"),1===b.length&&1===d.nodeType?r.find.matchesSelector(d,a)?[d]:[]:r.find.matches(a,r.grep(b,function(a){return 1===a.nodeType}))},r.fn.extend({find:function(a){var b,c,d=this.length,e=this;if("string"!=typeof a)return this.pushStack(r(a).filter(function(){for(b=0;b1?r.uniqueSort(c):c},filter:function(a){return this.pushStack(D(this,a||[],!1))},not:function(a){return this.pushStack(D(this,a||[],!0))},is:function(a){return!!D(this,"string"==typeof a&&A.test(a)?r(a):a||[],!1).length}});var E,F=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]+))$/,G=r.fn.init=function(a,b,c){var e,f;if(!a)return this;if(c=c||E,"string"==typeof a){if(e="<"===a[0]&&">"===a[a.length-1]&&a.length>=3?[null,a,null]:F.exec(a),!e||!e[1]&&b)return!b||b.jquery?(b||c).find(a):this.constructor(b).find(a);if(e[1]){if(b=b instanceof r?b[0]:b,r.merge(this,r.parseHTML(e[1],b&&b.nodeType?b.ownerDocument||b:d,!0)),B.test(e[1])&&r.isPlainObject(b))for(e in b)r.isFunction(this[e])?this[e](b[e]):this.attr(e,b[e]);return this}return f=d.getElementById(e[2]),f&&(this[0]=f,this.length=1),this}return a.nodeType?(this[0]=a,this.length=1,this):r.isFunction(a)?void 0!==c.ready?c.ready(a):a(r):r.makeArray(a,this)};G.prototype=r.fn,E=r(d);var H=/^(?:parents|prev(?:Until|All))/,I={children:!0,contents:!0,next:!0,prev:!0};r.fn.extend({has:function(a){var b=r(a,this),c=b.length;return this.filter(function(){for(var a=0;a-1:1===c.nodeType&&r.find.matchesSelector(c,a))){f.push(c);break}return this.pushStack(f.length>1?r.uniqueSort(f):f)},index:function(a){return a?"string"==typeof a?i.call(r(a),this[0]):i.call(this,a.jquery?a[0]:a):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(a,b){return this.pushStack(r.uniqueSort(r.merge(this.get(),r(a,b))))},addBack:function(a){return this.add(null==a?this.prevObject:this.prevObject.filter(a))}});function J(a,b){while((a=a[b])&&1!==a.nodeType);return a}r.each({parent:function(a){var b=a.parentNode;return b&&11!==b.nodeType?b:null},parents:function(a){return y(a,"parentNode")},parentsUntil:function(a,b,c){return y(a,"parentNode",c)},next:function(a){return J(a,"nextSibling")},prev:function(a){return J(a,"previousSibling")},nextAll:function(a){return y(a,"nextSibling")},prevAll:function(a){return y(a,"previousSibling")},nextUntil:function(a,b,c){return y(a,"nextSibling",c)},prevUntil:function(a,b,c){return y(a,"previousSibling",c)},siblings:function(a){return z((a.parentNode||{}).firstChild,a)},children:function(a){return z(a.firstChild)},contents:function(a){return a.contentDocument||r.merge([],a.childNodes)}},function(a,b){r.fn[a]=function(c,d){var e=r.map(this,b,c);return"Until"!==a.slice(-5)&&(d=c),d&&"string"==typeof d&&(e=r.filter(d,e)),this.length>1&&(I[a]||r.uniqueSort(e),H.test(a)&&e.reverse()),this.pushStack(e)}});var K=/[^\x20\t\r\n\f]+/g;function L(a){var b={};return r.each(a.match(K)||[],function(a,c){b[c]=!0}),b}r.Callbacks=function(a){a="string"==typeof a?L(a):r.extend({},a);var b,c,d,e,f=[],g=[],h=-1,i=function(){for(e=a.once,d=b=!0;g.length;h=-1){c=g.shift();while(++h-1)f.splice(c,1),c<=h&&h--}),this},has:function(a){return a?r.inArray(a,f)>-1:f.length>0},empty:function(){return f&&(f=[]),this},disable:function(){return e=g=[],f=c="",this},disabled:function(){return!f},lock:function(){return e=g=[],c||b||(f=c=""),this},locked:function(){return!!e},fireWith:function(a,c){return e||(c=c||[],c=[a,c.slice?c.slice():c],g.push(c),b||i()),this},fire:function(){return j.fireWith(this,arguments),this},fired:function(){return!!d}};return j};function M(a){return a}function N(a){throw a}function O(a,b,c){var d;try{a&&r.isFunction(d=a.promise)?d.call(a).done(b).fail(c):a&&r.isFunction(d=a.then)?d.call(a,b,c):b.call(void 0,a)}catch(a){c.call(void 0,a)}}r.extend({Deferred:function(b){var c=[["notify","progress",r.Callbacks("memory"),r.Callbacks("memory"),2],["resolve","done",r.Callbacks("once memory"),r.Callbacks("once memory"),0,"resolved"],["reject","fail",r.Callbacks("once memory"),r.Callbacks("once memory"),1,"rejected"]],d="pending",e={state:function(){return d},always:function(){return f.done(arguments).fail(arguments),this},"catch":function(a){return e.then(null,a)},pipe:function(){var a=arguments;return r.Deferred(function(b){r.each(c,function(c,d){var e=r.isFunction(a[d[4]])&&a[d[4]];f[d[1]](function(){var a=e&&e.apply(this,arguments);a&&r.isFunction(a.promise)?a.promise().progress(b.notify).done(b.resolve).fail(b.reject):b[d[0]+"With"](this,e?[a]:arguments)})}),a=null}).promise()},then:function(b,d,e){var f=0;function g(b,c,d,e){return function(){var h=this,i=arguments,j=function(){var a,j;if(!(b=f&&(d!==N&&(h=void 0,i=[a]),c.rejectWith(h,i))}};b?k():(r.Deferred.getStackHook&&(k.stackTrace=r.Deferred.getStackHook()),a.setTimeout(k))}}return r.Deferred(function(a){c[0][3].add(g(0,a,r.isFunction(e)?e:M,a.notifyWith)),c[1][3].add(g(0,a,r.isFunction(b)?b:M)),c[2][3].add(g(0,a,r.isFunction(d)?d:N))}).promise()},promise:function(a){return null!=a?r.extend(a,e):e}},f={};return r.each(c,function(a,b){var g=b[2],h=b[5];e[b[1]]=g.add,h&&g.add(function(){d=h},c[3-a][2].disable,c[0][2].lock),g.add(b[3].fire),f[b[0]]=function(){return f[b[0]+"With"](this===f?void 0:this,arguments),this},f[b[0]+"With"]=g.fireWith}),e.promise(f),b&&b.call(f,f),f},when:function(a){var b=arguments.length,c=b,d=Array(c),e=f.call(arguments),g=r.Deferred(),h=function(a){return function(c){d[a]=this,e[a]=arguments.length>1?f.call(arguments):c,--b||g.resolveWith(d,e)}};if(b<=1&&(O(a,g.done(h(c)).resolve,g.reject),"pending"===g.state()||r.isFunction(e[c]&&e[c].then)))return g.then();while(c--)O(e[c],h(c),g.reject);return g.promise()}});var P=/^(Eval|Internal|Range|Reference|Syntax|Type|URI)Error$/;r.Deferred.exceptionHook=function(b,c){a.console&&a.console.warn&&b&&P.test(b.name)&&a.console.warn("jQuery.Deferred exception: "+b.message,b.stack,c)},r.readyException=function(b){a.setTimeout(function(){throw b})};var Q=r.Deferred();r.fn.ready=function(a){return Q.then(a)["catch"](function(a){r.readyException(a)}),this},r.extend({isReady:!1,readyWait:1,holdReady:function(a){a?r.readyWait++:r.ready(!0)},ready:function(a){(a===!0?--r.readyWait:r.isReady)||(r.isReady=!0,a!==!0&&--r.readyWait>0||Q.resolveWith(d,[r]))}}),r.ready.then=Q.then;function R(){d.removeEventListener("DOMContentLoaded",R), +a.removeEventListener("load",R),r.ready()}"complete"===d.readyState||"loading"!==d.readyState&&!d.documentElement.doScroll?a.setTimeout(r.ready):(d.addEventListener("DOMContentLoaded",R),a.addEventListener("load",R));var S=function(a,b,c,d,e,f,g){var h=0,i=a.length,j=null==c;if("object"===r.type(c)){e=!0;for(h in c)S(a,b,h,c[h],!0,f,g)}else if(void 0!==d&&(e=!0,r.isFunction(d)||(g=!0),j&&(g?(b.call(a,d),b=null):(j=b,b=function(a,b,c){return j.call(r(a),c)})),b))for(;h1,null,!0)},removeData:function(a){return this.each(function(){W.remove(this,a)})}}),r.extend({queue:function(a,b,c){var d;if(a)return b=(b||"fx")+"queue",d=V.get(a,b),c&&(!d||r.isArray(c)?d=V.access(a,b,r.makeArray(c)):d.push(c)),d||[]},dequeue:function(a,b){b=b||"fx";var c=r.queue(a,b),d=c.length,e=c.shift(),f=r._queueHooks(a,b),g=function(){r.dequeue(a,b)};"inprogress"===e&&(e=c.shift(),d--),e&&("fx"===b&&c.unshift("inprogress"),delete f.stop,e.call(a,g,f)),!d&&f&&f.empty.fire()},_queueHooks:function(a,b){var c=b+"queueHooks";return V.get(a,c)||V.access(a,c,{empty:r.Callbacks("once memory").add(function(){V.remove(a,[b+"queue",c])})})}}),r.fn.extend({queue:function(a,b){var c=2;return"string"!=typeof a&&(b=a,a="fx",c--),arguments.length\x20\t\r\n\f]+)/i,ka=/^$|\/(?:java|ecma)script/i,la={option:[1,""],thead:[1,"","
"],col:[2,"","
"],tr:[2,"","
"],td:[3,"","
"],_default:[0,"",""]};la.optgroup=la.option,la.tbody=la.tfoot=la.colgroup=la.caption=la.thead,la.th=la.td;function ma(a,b){var c;return c="undefined"!=typeof a.getElementsByTagName?a.getElementsByTagName(b||"*"):"undefined"!=typeof a.querySelectorAll?a.querySelectorAll(b||"*"):[],void 0===b||b&&r.nodeName(a,b)?r.merge([a],c):c}function na(a,b){for(var c=0,d=a.length;c-1)e&&e.push(f);else if(j=r.contains(f.ownerDocument,f),g=ma(l.appendChild(f),"script"),j&&na(g),c){k=0;while(f=g[k++])ka.test(f.type||"")&&c.push(f)}return l}!function(){var a=d.createDocumentFragment(),b=a.appendChild(d.createElement("div")),c=d.createElement("input");c.setAttribute("type","radio"),c.setAttribute("checked","checked"),c.setAttribute("name","t"),b.appendChild(c),o.checkClone=b.cloneNode(!0).cloneNode(!0).lastChild.checked,b.innerHTML="",o.noCloneChecked=!!b.cloneNode(!0).lastChild.defaultValue}();var qa=d.documentElement,ra=/^key/,sa=/^(?:mouse|pointer|contextmenu|drag|drop)|click/,ta=/^([^.]*)(?:\.(.+)|)/;function ua(){return!0}function va(){return!1}function wa(){try{return d.activeElement}catch(a){}}function xa(a,b,c,d,e,f){var g,h;if("object"==typeof b){"string"!=typeof c&&(d=d||c,c=void 0);for(h in b)xa(a,h,c,d,b[h],f);return a}if(null==d&&null==e?(e=c,d=c=void 0):null==e&&("string"==typeof c?(e=d,d=void 0):(e=d,d=c,c=void 0)),e===!1)e=va;else if(!e)return a;return 1===f&&(g=e,e=function(a){return r().off(a),g.apply(this,arguments)},e.guid=g.guid||(g.guid=r.guid++)),a.each(function(){r.event.add(this,b,e,d,c)})}r.event={global:{},add:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,n,o,p,q=V.get(a);if(q){c.handler&&(f=c,c=f.handler,e=f.selector),e&&r.find.matchesSelector(qa,e),c.guid||(c.guid=r.guid++),(i=q.events)||(i=q.events={}),(g=q.handle)||(g=q.handle=function(b){return"undefined"!=typeof r&&r.event.triggered!==b.type?r.event.dispatch.apply(a,arguments):void 0}),b=(b||"").match(K)||[""],j=b.length;while(j--)h=ta.exec(b[j])||[],n=p=h[1],o=(h[2]||"").split(".").sort(),n&&(l=r.event.special[n]||{},n=(e?l.delegateType:l.bindType)||n,l=r.event.special[n]||{},k=r.extend({type:n,origType:p,data:d,handler:c,guid:c.guid,selector:e,needsContext:e&&r.expr.match.needsContext.test(e),namespace:o.join(".")},f),(m=i[n])||(m=i[n]=[],m.delegateCount=0,l.setup&&l.setup.call(a,d,o,g)!==!1||a.addEventListener&&a.addEventListener(n,g)),l.add&&(l.add.call(a,k),k.handler.guid||(k.handler.guid=c.guid)),e?m.splice(m.delegateCount++,0,k):m.push(k),r.event.global[n]=!0)}},remove:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,n,o,p,q=V.hasData(a)&&V.get(a);if(q&&(i=q.events)){b=(b||"").match(K)||[""],j=b.length;while(j--)if(h=ta.exec(b[j])||[],n=p=h[1],o=(h[2]||"").split(".").sort(),n){l=r.event.special[n]||{},n=(d?l.delegateType:l.bindType)||n,m=i[n]||[],h=h[2]&&new RegExp("(^|\\.)"+o.join("\\.(?:.*\\.|)")+"(\\.|$)"),g=f=m.length;while(f--)k=m[f],!e&&p!==k.origType||c&&c.guid!==k.guid||h&&!h.test(k.namespace)||d&&d!==k.selector&&("**"!==d||!k.selector)||(m.splice(f,1),k.selector&&m.delegateCount--,l.remove&&l.remove.call(a,k));g&&!m.length&&(l.teardown&&l.teardown.call(a,o,q.handle)!==!1||r.removeEvent(a,n,q.handle),delete i[n])}else for(n in i)r.event.remove(a,n+b[j],c,d,!0);r.isEmptyObject(i)&&V.remove(a,"handle events")}},dispatch:function(a){var b=r.event.fix(a),c,d,e,f,g,h,i=new Array(arguments.length),j=(V.get(this,"events")||{})[b.type]||[],k=r.event.special[b.type]||{};for(i[0]=b,c=1;c=1))for(;j!==this;j=j.parentNode||this)if(1===j.nodeType&&("click"!==a.type||j.disabled!==!0)){for(f=[],g={},c=0;c-1:r.find(e,this,null,[j]).length),g[e]&&f.push(d);f.length&&h.push({elem:j,handlers:f})}return j=this,i\x20\t\r\n\f]*)[^>]*)\/>/gi,za=/\s*$/g;function Da(a,b){return r.nodeName(a,"table")&&r.nodeName(11!==b.nodeType?b:b.firstChild,"tr")?a.getElementsByTagName("tbody")[0]||a:a}function Ea(a){return a.type=(null!==a.getAttribute("type"))+"/"+a.type,a}function Fa(a){var b=Ba.exec(a.type);return b?a.type=b[1]:a.removeAttribute("type"),a}function Ga(a,b){var c,d,e,f,g,h,i,j;if(1===b.nodeType){if(V.hasData(a)&&(f=V.access(a),g=V.set(b,f),j=f.events)){delete g.handle,g.events={};for(e in j)for(c=0,d=j[e].length;c1&&"string"==typeof q&&!o.checkClone&&Aa.test(q))return a.each(function(e){var f=a.eq(e);s&&(b[0]=q.call(this,e,f.html())),Ia(f,b,c,d)});if(m&&(e=pa(b,a[0].ownerDocument,!1,a,d),f=e.firstChild,1===e.childNodes.length&&(e=f),f||d)){for(h=r.map(ma(e,"script"),Ea),i=h.length;l")},clone:function(a,b,c){var d,e,f,g,h=a.cloneNode(!0),i=r.contains(a.ownerDocument,a);if(!(o.noCloneChecked||1!==a.nodeType&&11!==a.nodeType||r.isXMLDoc(a)))for(g=ma(h),f=ma(a),d=0,e=f.length;d0&&na(g,!i&&ma(a,"script")),h},cleanData:function(a){for(var b,c,d,e=r.event.special,f=0;void 0!==(c=a[f]);f++)if(T(c)){if(b=c[V.expando]){if(b.events)for(d in b.events)e[d]?r.event.remove(c,d):r.removeEvent(c,d,b.handle);c[V.expando]=void 0}c[W.expando]&&(c[W.expando]=void 0)}}}),r.fn.extend({detach:function(a){return Ja(this,a,!0)},remove:function(a){return Ja(this,a)},text:function(a){return S(this,function(a){return void 0===a?r.text(this):this.empty().each(function(){1!==this.nodeType&&11!==this.nodeType&&9!==this.nodeType||(this.textContent=a)})},null,a,arguments.length)},append:function(){return Ia(this,arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=Da(this,a);b.appendChild(a)}})},prepend:function(){return Ia(this,arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=Da(this,a);b.insertBefore(a,b.firstChild)}})},before:function(){return Ia(this,arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this)})},after:function(){return Ia(this,arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this.nextSibling)})},empty:function(){for(var a,b=0;null!=(a=this[b]);b++)1===a.nodeType&&(r.cleanData(ma(a,!1)),a.textContent="");return this},clone:function(a,b){return a=null!=a&&a,b=null==b?a:b,this.map(function(){return r.clone(this,a,b)})},html:function(a){return S(this,function(a){var b=this[0]||{},c=0,d=this.length;if(void 0===a&&1===b.nodeType)return b.innerHTML;if("string"==typeof a&&!za.test(a)&&!la[(ja.exec(a)||["",""])[1].toLowerCase()]){a=r.htmlPrefilter(a);try{for(;c1)}});function Ya(a,b,c,d,e){return new Ya.prototype.init(a,b,c,d,e)}r.Tween=Ya,Ya.prototype={constructor:Ya,init:function(a,b,c,d,e,f){this.elem=a,this.prop=c,this.easing=e||r.easing._default,this.options=b,this.start=this.now=this.cur(),this.end=d,this.unit=f||(r.cssNumber[c]?"":"px")},cur:function(){var a=Ya.propHooks[this.prop];return a&&a.get?a.get(this):Ya.propHooks._default.get(this)},run:function(a){var b,c=Ya.propHooks[this.prop];return this.options.duration?this.pos=b=r.easing[this.easing](a,this.options.duration*a,0,1,this.options.duration):this.pos=b=a,this.now=(this.end-this.start)*b+this.start,this.options.step&&this.options.step.call(this.elem,this.now,this),c&&c.set?c.set(this):Ya.propHooks._default.set(this),this}},Ya.prototype.init.prototype=Ya.prototype,Ya.propHooks={_default:{get:function(a){var b;return 1!==a.elem.nodeType||null!=a.elem[a.prop]&&null==a.elem.style[a.prop]?a.elem[a.prop]:(b=r.css(a.elem,a.prop,""),b&&"auto"!==b?b:0)},set:function(a){r.fx.step[a.prop]?r.fx.step[a.prop](a):1!==a.elem.nodeType||null==a.elem.style[r.cssProps[a.prop]]&&!r.cssHooks[a.prop]?a.elem[a.prop]=a.now:r.style(a.elem,a.prop,a.now+a.unit)}}},Ya.propHooks.scrollTop=Ya.propHooks.scrollLeft={set:function(a){a.elem.nodeType&&a.elem.parentNode&&(a.elem[a.prop]=a.now)}},r.easing={linear:function(a){return a},swing:function(a){return.5-Math.cos(a*Math.PI)/2},_default:"swing"},r.fx=Ya.prototype.init,r.fx.step={};var Za,$a,_a=/^(?:toggle|show|hide)$/,ab=/queueHooks$/;function bb(){$a&&(a.requestAnimationFrame(bb),r.fx.tick())}function cb(){return a.setTimeout(function(){Za=void 0}),Za=r.now()}function db(a,b){var c,d=0,e={height:a};for(b=b?1:0;d<4;d+=2-b)c=ba[d],e["margin"+c]=e["padding"+c]=a;return b&&(e.opacity=e.width=a),e}function eb(a,b,c){for(var d,e=(hb.tweeners[b]||[]).concat(hb.tweeners["*"]),f=0,g=e.length;f1)},removeAttr:function(a){return this.each(function(){r.removeAttr(this,a)})}}),r.extend({attr:function(a,b,c){var d,e,f=a.nodeType;if(3!==f&&8!==f&&2!==f)return"undefined"==typeof a.getAttribute?r.prop(a,b,c):(1===f&&r.isXMLDoc(a)||(e=r.attrHooks[b.toLowerCase()]||(r.expr.match.bool.test(b)?ib:void 0)), +void 0!==c?null===c?void r.removeAttr(a,b):e&&"set"in e&&void 0!==(d=e.set(a,c,b))?d:(a.setAttribute(b,c+""),c):e&&"get"in e&&null!==(d=e.get(a,b))?d:(d=r.find.attr(a,b),null==d?void 0:d))},attrHooks:{type:{set:function(a,b){if(!o.radioValue&&"radio"===b&&r.nodeName(a,"input")){var c=a.value;return a.setAttribute("type",b),c&&(a.value=c),b}}}},removeAttr:function(a,b){var c,d=0,e=b&&b.match(K);if(e&&1===a.nodeType)while(c=e[d++])a.removeAttribute(c)}}),ib={set:function(a,b,c){return b===!1?r.removeAttr(a,c):a.setAttribute(c,c),c}},r.each(r.expr.match.bool.source.match(/\w+/g),function(a,b){var c=jb[b]||r.find.attr;jb[b]=function(a,b,d){var e,f,g=b.toLowerCase();return d||(f=jb[g],jb[g]=e,e=null!=c(a,b,d)?g:null,jb[g]=f),e}});var kb=/^(?:input|select|textarea|button)$/i,lb=/^(?:a|area)$/i;r.fn.extend({prop:function(a,b){return S(this,r.prop,a,b,arguments.length>1)},removeProp:function(a){return this.each(function(){delete this[r.propFix[a]||a]})}}),r.extend({prop:function(a,b,c){var d,e,f=a.nodeType;if(3!==f&&8!==f&&2!==f)return 1===f&&r.isXMLDoc(a)||(b=r.propFix[b]||b,e=r.propHooks[b]),void 0!==c?e&&"set"in e&&void 0!==(d=e.set(a,c,b))?d:a[b]=c:e&&"get"in e&&null!==(d=e.get(a,b))?d:a[b]},propHooks:{tabIndex:{get:function(a){var b=r.find.attr(a,"tabindex");return b?parseInt(b,10):kb.test(a.nodeName)||lb.test(a.nodeName)&&a.href?0:-1}}},propFix:{"for":"htmlFor","class":"className"}}),o.optSelected||(r.propHooks.selected={get:function(a){var b=a.parentNode;return b&&b.parentNode&&b.parentNode.selectedIndex,null},set:function(a){var b=a.parentNode;b&&(b.selectedIndex,b.parentNode&&b.parentNode.selectedIndex)}}),r.each(["tabIndex","readOnly","maxLength","cellSpacing","cellPadding","rowSpan","colSpan","useMap","frameBorder","contentEditable"],function(){r.propFix[this.toLowerCase()]=this});function mb(a){var b=a.match(K)||[];return b.join(" ")}function nb(a){return a.getAttribute&&a.getAttribute("class")||""}r.fn.extend({addClass:function(a){var b,c,d,e,f,g,h,i=0;if(r.isFunction(a))return this.each(function(b){r(this).addClass(a.call(this,b,nb(this)))});if("string"==typeof a&&a){b=a.match(K)||[];while(c=this[i++])if(e=nb(c),d=1===c.nodeType&&" "+mb(e)+" "){g=0;while(f=b[g++])d.indexOf(" "+f+" ")<0&&(d+=f+" ");h=mb(d),e!==h&&c.setAttribute("class",h)}}return this},removeClass:function(a){var b,c,d,e,f,g,h,i=0;if(r.isFunction(a))return this.each(function(b){r(this).removeClass(a.call(this,b,nb(this)))});if(!arguments.length)return this.attr("class","");if("string"==typeof a&&a){b=a.match(K)||[];while(c=this[i++])if(e=nb(c),d=1===c.nodeType&&" "+mb(e)+" "){g=0;while(f=b[g++])while(d.indexOf(" "+f+" ")>-1)d=d.replace(" "+f+" "," ");h=mb(d),e!==h&&c.setAttribute("class",h)}}return this},toggleClass:function(a,b){var c=typeof a;return"boolean"==typeof b&&"string"===c?b?this.addClass(a):this.removeClass(a):r.isFunction(a)?this.each(function(c){r(this).toggleClass(a.call(this,c,nb(this),b),b)}):this.each(function(){var b,d,e,f;if("string"===c){d=0,e=r(this),f=a.match(K)||[];while(b=f[d++])e.hasClass(b)?e.removeClass(b):e.addClass(b)}else void 0!==a&&"boolean"!==c||(b=nb(this),b&&V.set(this,"__className__",b),this.setAttribute&&this.setAttribute("class",b||a===!1?"":V.get(this,"__className__")||""))})},hasClass:function(a){var b,c,d=0;b=" "+a+" ";while(c=this[d++])if(1===c.nodeType&&(" "+mb(nb(c))+" ").indexOf(b)>-1)return!0;return!1}});var ob=/\r/g;r.fn.extend({val:function(a){var b,c,d,e=this[0];{if(arguments.length)return d=r.isFunction(a),this.each(function(c){var e;1===this.nodeType&&(e=d?a.call(this,c,r(this).val()):a,null==e?e="":"number"==typeof e?e+="":r.isArray(e)&&(e=r.map(e,function(a){return null==a?"":a+""})),b=r.valHooks[this.type]||r.valHooks[this.nodeName.toLowerCase()],b&&"set"in b&&void 0!==b.set(this,e,"value")||(this.value=e))});if(e)return b=r.valHooks[e.type]||r.valHooks[e.nodeName.toLowerCase()],b&&"get"in b&&void 0!==(c=b.get(e,"value"))?c:(c=e.value,"string"==typeof c?c.replace(ob,""):null==c?"":c)}}}),r.extend({valHooks:{option:{get:function(a){var b=r.find.attr(a,"value");return null!=b?b:mb(r.text(a))}},select:{get:function(a){var b,c,d,e=a.options,f=a.selectedIndex,g="select-one"===a.type,h=g?null:[],i=g?f+1:e.length;for(d=f<0?i:g?f:0;d-1)&&(c=!0);return c||(a.selectedIndex=-1),f}}}}),r.each(["radio","checkbox"],function(){r.valHooks[this]={set:function(a,b){if(r.isArray(b))return a.checked=r.inArray(r(a).val(),b)>-1}},o.checkOn||(r.valHooks[this].get=function(a){return null===a.getAttribute("value")?"on":a.value})});var pb=/^(?:focusinfocus|focusoutblur)$/;r.extend(r.event,{trigger:function(b,c,e,f){var g,h,i,j,k,m,n,o=[e||d],p=l.call(b,"type")?b.type:b,q=l.call(b,"namespace")?b.namespace.split("."):[];if(h=i=e=e||d,3!==e.nodeType&&8!==e.nodeType&&!pb.test(p+r.event.triggered)&&(p.indexOf(".")>-1&&(q=p.split("."),p=q.shift(),q.sort()),k=p.indexOf(":")<0&&"on"+p,b=b[r.expando]?b:new r.Event(p,"object"==typeof b&&b),b.isTrigger=f?2:3,b.namespace=q.join("."),b.rnamespace=b.namespace?new RegExp("(^|\\.)"+q.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,b.result=void 0,b.target||(b.target=e),c=null==c?[b]:r.makeArray(c,[b]),n=r.event.special[p]||{},f||!n.trigger||n.trigger.apply(e,c)!==!1)){if(!f&&!n.noBubble&&!r.isWindow(e)){for(j=n.delegateType||p,pb.test(j+p)||(h=h.parentNode);h;h=h.parentNode)o.push(h),i=h;i===(e.ownerDocument||d)&&o.push(i.defaultView||i.parentWindow||a)}g=0;while((h=o[g++])&&!b.isPropagationStopped())b.type=g>1?j:n.bindType||p,m=(V.get(h,"events")||{})[b.type]&&V.get(h,"handle"),m&&m.apply(h,c),m=k&&h[k],m&&m.apply&&T(h)&&(b.result=m.apply(h,c),b.result===!1&&b.preventDefault());return b.type=p,f||b.isDefaultPrevented()||n._default&&n._default.apply(o.pop(),c)!==!1||!T(e)||k&&r.isFunction(e[p])&&!r.isWindow(e)&&(i=e[k],i&&(e[k]=null),r.event.triggered=p,e[p](),r.event.triggered=void 0,i&&(e[k]=i)),b.result}},simulate:function(a,b,c){var d=r.extend(new r.Event,c,{type:a,isSimulated:!0});r.event.trigger(d,null,b)}}),r.fn.extend({trigger:function(a,b){return this.each(function(){r.event.trigger(a,b,this)})},triggerHandler:function(a,b){var c=this[0];if(c)return r.event.trigger(a,b,c,!0)}}),r.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(a,b){r.fn[b]=function(a,c){return arguments.length>0?this.on(b,null,a,c):this.trigger(b)}}),r.fn.extend({hover:function(a,b){return this.mouseenter(a).mouseleave(b||a)}}),o.focusin="onfocusin"in a,o.focusin||r.each({focus:"focusin",blur:"focusout"},function(a,b){var c=function(a){r.event.simulate(b,a.target,r.event.fix(a))};r.event.special[b]={setup:function(){var d=this.ownerDocument||this,e=V.access(d,b);e||d.addEventListener(a,c,!0),V.access(d,b,(e||0)+1)},teardown:function(){var d=this.ownerDocument||this,e=V.access(d,b)-1;e?V.access(d,b,e):(d.removeEventListener(a,c,!0),V.remove(d,b))}}});var qb=a.location,rb=r.now(),sb=/\?/;r.parseXML=function(b){var c;if(!b||"string"!=typeof b)return null;try{c=(new a.DOMParser).parseFromString(b,"text/xml")}catch(d){c=void 0}return c&&!c.getElementsByTagName("parsererror").length||r.error("Invalid XML: "+b),c};var tb=/\[\]$/,ub=/\r?\n/g,vb=/^(?:submit|button|image|reset|file)$/i,wb=/^(?:input|select|textarea|keygen)/i;function xb(a,b,c,d){var e;if(r.isArray(b))r.each(b,function(b,e){c||tb.test(a)?d(a,e):xb(a+"["+("object"==typeof e&&null!=e?b:"")+"]",e,c,d)});else if(c||"object"!==r.type(b))d(a,b);else for(e in b)xb(a+"["+e+"]",b[e],c,d)}r.param=function(a,b){var c,d=[],e=function(a,b){var c=r.isFunction(b)?b():b;d[d.length]=encodeURIComponent(a)+"="+encodeURIComponent(null==c?"":c)};if(r.isArray(a)||a.jquery&&!r.isPlainObject(a))r.each(a,function(){e(this.name,this.value)});else for(c in a)xb(c,a[c],b,e);return d.join("&")},r.fn.extend({serialize:function(){return r.param(this.serializeArray())},serializeArray:function(){return this.map(function(){var a=r.prop(this,"elements");return a?r.makeArray(a):this}).filter(function(){var a=this.type;return this.name&&!r(this).is(":disabled")&&wb.test(this.nodeName)&&!vb.test(a)&&(this.checked||!ia.test(a))}).map(function(a,b){var c=r(this).val();return null==c?null:r.isArray(c)?r.map(c,function(a){return{name:b.name,value:a.replace(ub,"\r\n")}}):{name:b.name,value:c.replace(ub,"\r\n")}}).get()}});var yb=/%20/g,zb=/#.*$/,Ab=/([?&])_=[^&]*/,Bb=/^(.*?):[ \t]*([^\r\n]*)$/gm,Cb=/^(?:about|app|app-storage|.+-extension|file|res|widget):$/,Db=/^(?:GET|HEAD)$/,Eb=/^\/\//,Fb={},Gb={},Hb="*/".concat("*"),Ib=d.createElement("a");Ib.href=qb.href;function Jb(a){return function(b,c){"string"!=typeof b&&(c=b,b="*");var d,e=0,f=b.toLowerCase().match(K)||[];if(r.isFunction(c))while(d=f[e++])"+"===d[0]?(d=d.slice(1)||"*",(a[d]=a[d]||[]).unshift(c)):(a[d]=a[d]||[]).push(c)}}function Kb(a,b,c,d){var e={},f=a===Gb;function g(h){var i;return e[h]=!0,r.each(a[h]||[],function(a,h){var j=h(b,c,d);return"string"!=typeof j||f||e[j]?f?!(i=j):void 0:(b.dataTypes.unshift(j),g(j),!1)}),i}return g(b.dataTypes[0])||!e["*"]&&g("*")}function Lb(a,b){var c,d,e=r.ajaxSettings.flatOptions||{};for(c in b)void 0!==b[c]&&((e[c]?a:d||(d={}))[c]=b[c]);return d&&r.extend(!0,a,d),a}function Mb(a,b,c){var d,e,f,g,h=a.contents,i=a.dataTypes;while("*"===i[0])i.shift(),void 0===d&&(d=a.mimeType||b.getResponseHeader("Content-Type"));if(d)for(e in h)if(h[e]&&h[e].test(d)){i.unshift(e);break}if(i[0]in c)f=i[0];else{for(e in c){if(!i[0]||a.converters[e+" "+i[0]]){f=e;break}g||(g=e)}f=f||g}if(f)return f!==i[0]&&i.unshift(f),c[f]}function Nb(a,b,c,d){var e,f,g,h,i,j={},k=a.dataTypes.slice();if(k[1])for(g in a.converters)j[g.toLowerCase()]=a.converters[g];f=k.shift();while(f)if(a.responseFields[f]&&(c[a.responseFields[f]]=b),!i&&d&&a.dataFilter&&(b=a.dataFilter(b,a.dataType)),i=f,f=k.shift())if("*"===f)f=i;else if("*"!==i&&i!==f){if(g=j[i+" "+f]||j["* "+f],!g)for(e in j)if(h=e.split(" "),h[1]===f&&(g=j[i+" "+h[0]]||j["* "+h[0]])){g===!0?g=j[e]:j[e]!==!0&&(f=h[0],k.unshift(h[1]));break}if(g!==!0)if(g&&a["throws"])b=g(b);else try{b=g(b)}catch(l){return{state:"parsererror",error:g?l:"No conversion from "+i+" to "+f}}}return{state:"success",data:b}}r.extend({active:0,lastModified:{},etag:{},ajaxSettings:{url:qb.href,type:"GET",isLocal:Cb.test(qb.protocol),global:!0,processData:!0,async:!0,contentType:"application/x-www-form-urlencoded; charset=UTF-8",accepts:{"*":Hb,text:"text/plain",html:"text/html",xml:"application/xml, text/xml",json:"application/json, text/javascript"},contents:{xml:/\bxml\b/,html:/\bhtml/,json:/\bjson\b/},responseFields:{xml:"responseXML",text:"responseText",json:"responseJSON"},converters:{"* text":String,"text html":!0,"text json":JSON.parse,"text xml":r.parseXML},flatOptions:{url:!0,context:!0}},ajaxSetup:function(a,b){return b?Lb(Lb(a,r.ajaxSettings),b):Lb(r.ajaxSettings,a)},ajaxPrefilter:Jb(Fb),ajaxTransport:Jb(Gb),ajax:function(b,c){"object"==typeof b&&(c=b,b=void 0),c=c||{};var e,f,g,h,i,j,k,l,m,n,o=r.ajaxSetup({},c),p=o.context||o,q=o.context&&(p.nodeType||p.jquery)?r(p):r.event,s=r.Deferred(),t=r.Callbacks("once memory"),u=o.statusCode||{},v={},w={},x="canceled",y={readyState:0,getResponseHeader:function(a){var b;if(k){if(!h){h={};while(b=Bb.exec(g))h[b[1].toLowerCase()]=b[2]}b=h[a.toLowerCase()]}return null==b?null:b},getAllResponseHeaders:function(){return k?g:null},setRequestHeader:function(a,b){return null==k&&(a=w[a.toLowerCase()]=w[a.toLowerCase()]||a,v[a]=b),this},overrideMimeType:function(a){return null==k&&(o.mimeType=a),this},statusCode:function(a){var b;if(a)if(k)y.always(a[y.status]);else for(b in a)u[b]=[u[b],a[b]];return this},abort:function(a){var b=a||x;return e&&e.abort(b),A(0,b),this}};if(s.promise(y),o.url=((b||o.url||qb.href)+"").replace(Eb,qb.protocol+"//"),o.type=c.method||c.type||o.method||o.type,o.dataTypes=(o.dataType||"*").toLowerCase().match(K)||[""],null==o.crossDomain){j=d.createElement("a");try{j.href=o.url,j.href=j.href,o.crossDomain=Ib.protocol+"//"+Ib.host!=j.protocol+"//"+j.host}catch(z){o.crossDomain=!0}}if(o.data&&o.processData&&"string"!=typeof o.data&&(o.data=r.param(o.data,o.traditional)),Kb(Fb,o,c,y),k)return y;l=r.event&&o.global,l&&0===r.active++&&r.event.trigger("ajaxStart"),o.type=o.type.toUpperCase(),o.hasContent=!Db.test(o.type),f=o.url.replace(zb,""),o.hasContent?o.data&&o.processData&&0===(o.contentType||"").indexOf("application/x-www-form-urlencoded")&&(o.data=o.data.replace(yb,"+")):(n=o.url.slice(f.length),o.data&&(f+=(sb.test(f)?"&":"?")+o.data,delete o.data),o.cache===!1&&(f=f.replace(Ab,"$1"),n=(sb.test(f)?"&":"?")+"_="+rb++ +n),o.url=f+n),o.ifModified&&(r.lastModified[f]&&y.setRequestHeader("If-Modified-Since",r.lastModified[f]),r.etag[f]&&y.setRequestHeader("If-None-Match",r.etag[f])),(o.data&&o.hasContent&&o.contentType!==!1||c.contentType)&&y.setRequestHeader("Content-Type",o.contentType),y.setRequestHeader("Accept",o.dataTypes[0]&&o.accepts[o.dataTypes[0]]?o.accepts[o.dataTypes[0]]+("*"!==o.dataTypes[0]?", "+Hb+"; q=0.01":""):o.accepts["*"]);for(m in o.headers)y.setRequestHeader(m,o.headers[m]);if(o.beforeSend&&(o.beforeSend.call(p,y,o)===!1||k))return y.abort();if(x="abort",t.add(o.complete),y.done(o.success),y.fail(o.error),e=Kb(Gb,o,c,y)){if(y.readyState=1,l&&q.trigger("ajaxSend",[y,o]),k)return y;o.async&&o.timeout>0&&(i=a.setTimeout(function(){y.abort("timeout")},o.timeout));try{k=!1,e.send(v,A)}catch(z){if(k)throw z;A(-1,z)}}else A(-1,"No Transport");function A(b,c,d,h){var j,m,n,v,w,x=c;k||(k=!0,i&&a.clearTimeout(i),e=void 0,g=h||"",y.readyState=b>0?4:0,j=b>=200&&b<300||304===b,d&&(v=Mb(o,y,d)),v=Nb(o,v,y,j),j?(o.ifModified&&(w=y.getResponseHeader("Last-Modified"),w&&(r.lastModified[f]=w),w=y.getResponseHeader("etag"),w&&(r.etag[f]=w)),204===b||"HEAD"===o.type?x="nocontent":304===b?x="notmodified":(x=v.state,m=v.data,n=v.error,j=!n)):(n=x,!b&&x||(x="error",b<0&&(b=0))),y.status=b,y.statusText=(c||x)+"",j?s.resolveWith(p,[m,x,y]):s.rejectWith(p,[y,x,n]),y.statusCode(u),u=void 0,l&&q.trigger(j?"ajaxSuccess":"ajaxError",[y,o,j?m:n]),t.fireWith(p,[y,x]),l&&(q.trigger("ajaxComplete",[y,o]),--r.active||r.event.trigger("ajaxStop")))}return y},getJSON:function(a,b,c){return r.get(a,b,c,"json")},getScript:function(a,b){return r.get(a,void 0,b,"script")}}),r.each(["get","post"],function(a,b){r[b]=function(a,c,d,e){return r.isFunction(c)&&(e=e||d,d=c,c=void 0),r.ajax(r.extend({url:a,type:b,dataType:e,data:c,success:d},r.isPlainObject(a)&&a))}}),r._evalUrl=function(a){return r.ajax({url:a,type:"GET",dataType:"script",cache:!0,async:!1,global:!1,"throws":!0})},r.fn.extend({wrapAll:function(a){var b;return this[0]&&(r.isFunction(a)&&(a=a.call(this[0])),b=r(a,this[0].ownerDocument).eq(0).clone(!0),this[0].parentNode&&b.insertBefore(this[0]),b.map(function(){var a=this;while(a.firstElementChild)a=a.firstElementChild;return a}).append(this)),this},wrapInner:function(a){return r.isFunction(a)?this.each(function(b){r(this).wrapInner(a.call(this,b))}):this.each(function(){var b=r(this),c=b.contents();c.length?c.wrapAll(a):b.append(a)})},wrap:function(a){var b=r.isFunction(a);return this.each(function(c){r(this).wrapAll(b?a.call(this,c):a)})},unwrap:function(a){return this.parent(a).not("body").each(function(){r(this).replaceWith(this.childNodes)}),this}}),r.expr.pseudos.hidden=function(a){return!r.expr.pseudos.visible(a)},r.expr.pseudos.visible=function(a){return!!(a.offsetWidth||a.offsetHeight||a.getClientRects().length)},r.ajaxSettings.xhr=function(){try{return new a.XMLHttpRequest}catch(b){}};var Ob={0:200,1223:204},Pb=r.ajaxSettings.xhr();o.cors=!!Pb&&"withCredentials"in Pb,o.ajax=Pb=!!Pb,r.ajaxTransport(function(b){var c,d;if(o.cors||Pb&&!b.crossDomain)return{send:function(e,f){var g,h=b.xhr();if(h.open(b.type,b.url,b.async,b.username,b.password),b.xhrFields)for(g in b.xhrFields)h[g]=b.xhrFields[g];b.mimeType&&h.overrideMimeType&&h.overrideMimeType(b.mimeType),b.crossDomain||e["X-Requested-With"]||(e["X-Requested-With"]="XMLHttpRequest");for(g in e)h.setRequestHeader(g,e[g]);c=function(a){return function(){c&&(c=d=h.onload=h.onerror=h.onabort=h.onreadystatechange=null,"abort"===a?h.abort():"error"===a?"number"!=typeof h.status?f(0,"error"):f(h.status,h.statusText):f(Ob[h.status]||h.status,h.statusText,"text"!==(h.responseType||"text")||"string"!=typeof h.responseText?{binary:h.response}:{text:h.responseText},h.getAllResponseHeaders()))}},h.onload=c(),d=h.onerror=c("error"),void 0!==h.onabort?h.onabort=d:h.onreadystatechange=function(){4===h.readyState&&a.setTimeout(function(){c&&d()})},c=c("abort");try{h.send(b.hasContent&&b.data||null)}catch(i){if(c)throw i}},abort:function(){c&&c()}}}),r.ajaxPrefilter(function(a){a.crossDomain&&(a.contents.script=!1)}),r.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/\b(?:java|ecma)script\b/},converters:{"text script":function(a){return r.globalEval(a),a}}}),r.ajaxPrefilter("script",function(a){void 0===a.cache&&(a.cache=!1),a.crossDomain&&(a.type="GET")}),r.ajaxTransport("script",function(a){if(a.crossDomain){var b,c;return{send:function(e,f){b=r("