Clean&Refactoring&Migrate to .net 6

pull/1/head
unknown 3 years ago
parent 29958c4a48
commit 6dc71702f5

@ -2,7 +2,7 @@
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>netcoreapp3.1</TargetFramework>
<TargetFramework>net6.0</TargetFramework>
<Platforms>AnyCPU;x64;x86</Platforms>
</PropertyGroup>

@ -1,8 +1,8 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>netcoreapp3.1</TargetFramework>
<TargetFramework>net6.0</TargetFramework>
<Platforms>AnyCPU;x64;x86</Platforms>
</PropertyGroup>

@ -1,6 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.8"/>
</startup>
</configuration>

@ -0,0 +1,10 @@
using System.Windows;
[assembly: ThemeInfo(
ResourceDictionaryLocation.None, //where theme specific resource dictionaries are located
//(used if a resource is not found in the page,
// or application resource dictionaries)
ResourceDictionaryLocation.SourceAssembly //where the generic resource dictionary is located
//(used if a resource is not found in the page,
// app, or any theme specific resource dictionaries)
)]

@ -1,147 +1,27 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{F8B727E1-340D-4096-A784-E570AE13FABC}</ProjectGuid>
<OutputType>WinExe</OutputType>
<RootNamespace>FileTransferClient</RootNamespace>
<AssemblyName>FileTransferClient</AssemblyName>
<TargetFrameworkVersion>v4.8</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<ProjectTypeGuids>{60dc8134-eba5-43b8-bcc9-bb4bc16c2548};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
<WarningLevel>4</WarningLevel>
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
<Deterministic>true</Deterministic>
<TargetFrameworkProfile />
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x64'">
<DebugSymbols>true</DebugSymbols>
<OutputPath>bin\x64\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DebugType>full</DebugType>
<PlatformTarget>x64</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x64'">
<OutputPath>bin\x64\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<Optimize>true</Optimize>
<DebugType>pdbonly</DebugType>
<PlatformTarget>x64</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x86'">
<DebugSymbols>true</DebugSymbols>
<OutputPath>bin\x86\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DebugType>full</DebugType>
<PlatformTarget>x86</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x86'">
<OutputPath>bin\x86\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<Optimize>true</Optimize>
<DebugType>pdbonly</DebugType>
<PlatformTarget>x86</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>true</Prefer32Bit>
<TargetFramework>net6.0-windows</TargetFramework>
<Nullable>enable</Nullable>
<UseWPF>true</UseWPF>
<Platforms>AnyCPU;x64</Platforms>
</PropertyGroup>
<ItemGroup>
<Reference Include="System" />
<Reference Include="System.Data" />
<Reference Include="System.Xml" />
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="System.Net.Http" />
<Reference Include="System.Xaml">
<RequiredTargetFramework>4.0</RequiredTargetFramework>
</Reference>
<Reference Include="WindowsBase" />
<Reference Include="PresentationCore" />
<Reference Include="PresentationFramework" />
<ProjectReference Include="..\..\ZeroLevel\ZeroLevel.csproj" />
</ItemGroup>
<ItemGroup>
<ApplicationDefinition Include="App.xaml">
<Generator>MSBuild:Compile</Generator>
<SubType>Designer</SubType>
</ApplicationDefinition>
<Page Include="MainWindow.xaml">
<Generator>MSBuild:Compile</Generator>
<SubType>Designer</SubType>
</Page>
<Compile Include="App.xaml.cs">
<DependentUpon>App.xaml</DependentUpon>
<SubType>Code</SubType>
</Compile>
<Compile Include="MainWindow.xaml.cs">
<DependentUpon>MainWindow.xaml</DependentUpon>
<Compile Update="MainWindow.xaml.cs">
<SubType>Code</SubType>
</Compile>
</ItemGroup>
<ItemGroup>
<Compile Include="Properties\AssemblyInfo.cs">
<SubType>Code</SubType>
</Compile>
<Compile Include="Properties\Resources.Designer.cs">
<AutoGen>True</AutoGen>
<DesignTime>True</DesignTime>
<DependentUpon>Resources.resx</DependentUpon>
</Compile>
<Compile Include="Properties\Settings.Designer.cs">
<AutoGen>True</AutoGen>
<DependentUpon>Settings.settings</DependentUpon>
<DesignTimeSharedInput>True</DesignTimeSharedInput>
</Compile>
<EmbeddedResource Include="Properties\Resources.resx">
<Generator>ResXFileCodeGenerator</Generator>
<LastGenOutput>Resources.Designer.cs</LastGenOutput>
</EmbeddedResource>
<None Include="Properties\Settings.settings">
<Generator>SettingsSingleFileGenerator</Generator>
<LastGenOutput>Settings.Designer.cs</LastGenOutput>
</None>
</ItemGroup>
<ItemGroup>
<None Include="App.config" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\ZeroLevel\ZeroLevel.csproj">
<Project>{06c9e60e-d449-41a7-9bf0-a829aaf5d214}</Project>
<Name>ZeroLevel</Name>
</ProjectReference>
<Page Update="MainWindow.xaml">
<SubType>Designer</SubType>
</Page>
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
</Project>

@ -1,55 +0,0 @@
using System.Reflection;
using System.Resources;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Windows;
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("FileTransferClient")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("FileTransferClient")]
[assembly: AssemblyCopyright("Copyright © 2019")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]
//In order to begin building localizable applications, set
//<UICulture>CultureYouAreCodingWith</UICulture> in your .csproj file
//inside a <PropertyGroup>. For example, if you are using US english
//in your source files, set the <UICulture> to en-US. Then uncomment
//the NeutralResourceLanguage attribute below. Update the "en-US" in
//the line below to match the UICulture setting in the project file.
//[assembly: NeutralResourcesLanguage("en-US", UltimateResourceFallbackLocation.Satellite)]
[assembly: ThemeInfo(
ResourceDictionaryLocation.None, //where theme specific resource dictionaries are located
//(used if a resource is not found in the page,
// or application resource dictionaries)
ResourceDictionaryLocation.SourceAssembly //where the generic resource dictionary is located
//(used if a resource is not found in the page,
// app, or any theme specific resource dictionaries)
)]
// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]

@ -1,63 +0,0 @@
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Runtime Version:4.0.30319.42000
//
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//------------------------------------------------------------------------------
namespace FileTransferClient.Properties {
using System;
/// <summary>
/// A strongly-typed resource class, for looking up localized strings, etc.
/// </summary>
// This class was auto-generated by the StronglyTypedResourceBuilder
// class via a tool like ResGen or Visual Studio.
// To add or remove a member, edit your .ResX file then rerun ResGen
// with the /str option, or rebuild your VS project.
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "16.0.0.0")]
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
internal class Resources {
private static global::System.Resources.ResourceManager resourceMan;
private static global::System.Globalization.CultureInfo resourceCulture;
[global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")]
internal Resources() {
}
/// <summary>
/// Returns the cached ResourceManager instance used by this class.
/// </summary>
[global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
internal static global::System.Resources.ResourceManager ResourceManager {
get {
if (object.ReferenceEquals(resourceMan, null)) {
global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("FileTransferClient.Properties.Resources", typeof(Resources).Assembly);
resourceMan = temp;
}
return resourceMan;
}
}
/// <summary>
/// Overrides the current thread's CurrentUICulture property for all
/// resource lookups using this strongly typed resource class.
/// </summary>
[global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
internal static global::System.Globalization.CultureInfo Culture {
get {
return resourceCulture;
}
set {
resourceCulture = value;
}
}
}
}

@ -1,117 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<root>
<!--
Microsoft ResX Schema
Version 2.0
The primary goals of this format is to allow a simple XML format
that is mostly human readable. The generation and parsing of the
various data types are done through the TypeConverter classes
associated with the data types.
Example:
... ado.net/XML headers & schema ...
<resheader name="resmimetype">text/microsoft-resx</resheader>
<resheader name="version">2.0</resheader>
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
<value>[base64 mime encoded serialized .NET Framework object]</value>
</data>
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
<comment>This is a comment</comment>
</data>
There are any number of "resheader" rows that contain simple
name/value pairs.
Each data row contains a name, and value. The row also contains a
type or mimetype. Type corresponds to a .NET class that support
text/value conversion through the TypeConverter architecture.
Classes that don't support this are serialized and stored with the
mimetype set.
The mimetype is used for serialized objects, and tells the
ResXResourceReader how to depersist the object. This is currently not
extensible. For a given mimetype the value must be set accordingly:
Note - application/x-microsoft.net.object.binary.base64 is the format
that the ResXResourceWriter will generate, however the reader can
read any of the formats listed below.
mimetype: application/x-microsoft.net.object.binary.base64
value : The object must be serialized with
: System.Serialization.Formatters.Binary.BinaryFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.soap.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.bytearray.base64
value : The object must be serialized into a byte array
: using a System.ComponentModel.TypeConverter
: and then encoded with base64 encoding.
-->
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
<xsd:element name="root" msdata:IsDataSet="true">
<xsd:complexType>
<xsd:choice maxOccurs="unbounded">
<xsd:element name="metadata">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" />
<xsd:attribute name="type" type="xsd:string" />
<xsd:attribute name="mimetype" type="xsd:string" />
</xsd:complexType>
</xsd:element>
<xsd:element name="assembly">
<xsd:complexType>
<xsd:attribute name="alias" type="xsd:string" />
<xsd:attribute name="name" type="xsd:string" />
</xsd:complexType>
</xsd:element>
<xsd:element name="data">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" msdata:Ordinal="1" />
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
</xsd:complexType>
</xsd:element>
<xsd:element name="resheader">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" />
</xsd:complexType>
</xsd:element>
</xsd:choice>
</xsd:complexType>
</xsd:element>
</xsd:schema>
<resheader name="resmimetype">
<value>text/microsoft-resx</value>
</resheader>
<resheader name="version">
<value>2.0</value>
</resheader>
<resheader name="reader">
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
</root>

@ -1,26 +0,0 @@
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Runtime Version:4.0.30319.42000
//
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//------------------------------------------------------------------------------
namespace FileTransferClient.Properties {
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "16.4.0.0")]
internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase {
private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings())));
public static Settings Default {
get {
return defaultInstance;
}
}
}
}

@ -1,7 +0,0 @@
<?xml version='1.0' encoding='utf-8'?>
<SettingsFile xmlns="uri:settings" CurrentProfile="(Default)">
<Profiles>
<Profile Name="(Default)" />
</Profiles>
<Settings />
</SettingsFile>

@ -1,6 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.8"/>
</startup>
</configuration>

@ -0,0 +1,10 @@
using System.Windows;
[assembly: ThemeInfo(
ResourceDictionaryLocation.None, //where theme specific resource dictionaries are located
//(used if a resource is not found in the page,
// or application resource dictionaries)
ResourceDictionaryLocation.SourceAssembly //where the generic resource dictionary is located
//(used if a resource is not found in the page,
// app, or any theme specific resource dictionaries)
)]

@ -1,147 +1,27 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{9BF859EE-EF90-4B5B-8576-E26770F2F792}</ProjectGuid>
<OutputType>WinExe</OutputType>
<RootNamespace>FileTransferServer</RootNamespace>
<AssemblyName>FileTransferServer</AssemblyName>
<TargetFrameworkVersion>v4.8</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<ProjectTypeGuids>{60dc8134-eba5-43b8-bcc9-bb4bc16c2548};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
<WarningLevel>4</WarningLevel>
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
<Deterministic>true</Deterministic>
<TargetFrameworkProfile />
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x64'">
<DebugSymbols>true</DebugSymbols>
<OutputPath>bin\x64\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DebugType>full</DebugType>
<PlatformTarget>x64</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x64'">
<OutputPath>bin\x64\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<Optimize>true</Optimize>
<DebugType>pdbonly</DebugType>
<PlatformTarget>x64</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x86'">
<DebugSymbols>true</DebugSymbols>
<OutputPath>bin\x86\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DebugType>full</DebugType>
<PlatformTarget>x86</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x86'">
<OutputPath>bin\x86\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<Optimize>true</Optimize>
<DebugType>pdbonly</DebugType>
<PlatformTarget>x86</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>true</Prefer32Bit>
<TargetFramework>net6.0-windows</TargetFramework>
<Nullable>enable</Nullable>
<UseWPF>true</UseWPF>
<Platforms>AnyCPU;x64</Platforms>
</PropertyGroup>
<ItemGroup>
<Reference Include="System" />
<Reference Include="System.Data" />
<Reference Include="System.Xml" />
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="System.Net.Http" />
<Reference Include="System.Xaml">
<RequiredTargetFramework>4.0</RequiredTargetFramework>
</Reference>
<Reference Include="WindowsBase" />
<Reference Include="PresentationCore" />
<Reference Include="PresentationFramework" />
<ProjectReference Include="..\..\ZeroLevel\ZeroLevel.csproj" />
</ItemGroup>
<ItemGroup>
<ApplicationDefinition Include="App.xaml">
<Generator>MSBuild:Compile</Generator>
<SubType>Designer</SubType>
</ApplicationDefinition>
<Page Include="MainWindow.xaml">
<Generator>MSBuild:Compile</Generator>
<SubType>Designer</SubType>
</Page>
<Compile Include="App.xaml.cs">
<DependentUpon>App.xaml</DependentUpon>
<SubType>Code</SubType>
</Compile>
<Compile Include="MainWindow.xaml.cs">
<DependentUpon>MainWindow.xaml</DependentUpon>
<Compile Update="MainWindow.xaml.cs">
<SubType>Code</SubType>
</Compile>
</ItemGroup>
<ItemGroup>
<Compile Include="Properties\AssemblyInfo.cs">
<SubType>Code</SubType>
</Compile>
<Compile Include="Properties\Resources.Designer.cs">
<AutoGen>True</AutoGen>
<DesignTime>True</DesignTime>
<DependentUpon>Resources.resx</DependentUpon>
</Compile>
<Compile Include="Properties\Settings.Designer.cs">
<AutoGen>True</AutoGen>
<DependentUpon>Settings.settings</DependentUpon>
<DesignTimeSharedInput>True</DesignTimeSharedInput>
</Compile>
<EmbeddedResource Include="Properties\Resources.resx">
<Generator>ResXFileCodeGenerator</Generator>
<LastGenOutput>Resources.Designer.cs</LastGenOutput>
</EmbeddedResource>
<None Include="Properties\Settings.settings">
<Generator>SettingsSingleFileGenerator</Generator>
<LastGenOutput>Settings.Designer.cs</LastGenOutput>
</None>
</ItemGroup>
<ItemGroup>
<None Include="App.config" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\ZeroLevel\ZeroLevel.csproj">
<Project>{06c9e60e-d449-41a7-9bf0-a829aaf5d214}</Project>
<Name>ZeroLevel</Name>
</ProjectReference>
<Page Update="MainWindow.xaml">
<SubType>Designer</SubType>
</Page>
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
</Project>

@ -1,55 +0,0 @@
using System.Reflection;
using System.Resources;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Windows;
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("FileTransferServer")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("FileTransferServer")]
[assembly: AssemblyCopyright("Copyright © 2019")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]
//In order to begin building localizable applications, set
//<UICulture>CultureYouAreCodingWith</UICulture> in your .csproj file
//inside a <PropertyGroup>. For example, if you are using US english
//in your source files, set the <UICulture> to en-US. Then uncomment
//the NeutralResourceLanguage attribute below. Update the "en-US" in
//the line below to match the UICulture setting in the project file.
//[assembly: NeutralResourcesLanguage("en-US", UltimateResourceFallbackLocation.Satellite)]
[assembly: ThemeInfo(
ResourceDictionaryLocation.None, //where theme specific resource dictionaries are located
//(used if a resource is not found in the page,
// or application resource dictionaries)
ResourceDictionaryLocation.SourceAssembly //where the generic resource dictionary is located
//(used if a resource is not found in the page,
// app, or any theme specific resource dictionaries)
)]
// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]

@ -1,63 +0,0 @@
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Runtime Version:4.0.30319.42000
//
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//------------------------------------------------------------------------------
namespace FileTransferServer.Properties {
using System;
/// <summary>
/// A strongly-typed resource class, for looking up localized strings, etc.
/// </summary>
// This class was auto-generated by the StronglyTypedResourceBuilder
// class via a tool like ResGen or Visual Studio.
// To add or remove a member, edit your .ResX file then rerun ResGen
// with the /str option, or rebuild your VS project.
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "16.0.0.0")]
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
internal class Resources {
private static global::System.Resources.ResourceManager resourceMan;
private static global::System.Globalization.CultureInfo resourceCulture;
[global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")]
internal Resources() {
}
/// <summary>
/// Returns the cached ResourceManager instance used by this class.
/// </summary>
[global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
internal static global::System.Resources.ResourceManager ResourceManager {
get {
if (object.ReferenceEquals(resourceMan, null)) {
global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("FileTransferServer.Properties.Resources", typeof(Resources).Assembly);
resourceMan = temp;
}
return resourceMan;
}
}
/// <summary>
/// Overrides the current thread's CurrentUICulture property for all
/// resource lookups using this strongly typed resource class.
/// </summary>
[global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
internal static global::System.Globalization.CultureInfo Culture {
get {
return resourceCulture;
}
set {
resourceCulture = value;
}
}
}
}

@ -1,117 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<root>
<!--
Microsoft ResX Schema
Version 2.0
The primary goals of this format is to allow a simple XML format
that is mostly human readable. The generation and parsing of the
various data types are done through the TypeConverter classes
associated with the data types.
Example:
... ado.net/XML headers & schema ...
<resheader name="resmimetype">text/microsoft-resx</resheader>
<resheader name="version">2.0</resheader>
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
<value>[base64 mime encoded serialized .NET Framework object]</value>
</data>
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
<comment>This is a comment</comment>
</data>
There are any number of "resheader" rows that contain simple
name/value pairs.
Each data row contains a name, and value. The row also contains a
type or mimetype. Type corresponds to a .NET class that support
text/value conversion through the TypeConverter architecture.
Classes that don't support this are serialized and stored with the
mimetype set.
The mimetype is used for serialized objects, and tells the
ResXResourceReader how to depersist the object. This is currently not
extensible. For a given mimetype the value must be set accordingly:
Note - application/x-microsoft.net.object.binary.base64 is the format
that the ResXResourceWriter will generate, however the reader can
read any of the formats listed below.
mimetype: application/x-microsoft.net.object.binary.base64
value : The object must be serialized with
: System.Serialization.Formatters.Binary.BinaryFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.soap.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.bytearray.base64
value : The object must be serialized into a byte array
: using a System.ComponentModel.TypeConverter
: and then encoded with base64 encoding.
-->
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
<xsd:element name="root" msdata:IsDataSet="true">
<xsd:complexType>
<xsd:choice maxOccurs="unbounded">
<xsd:element name="metadata">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" />
<xsd:attribute name="type" type="xsd:string" />
<xsd:attribute name="mimetype" type="xsd:string" />
</xsd:complexType>
</xsd:element>
<xsd:element name="assembly">
<xsd:complexType>
<xsd:attribute name="alias" type="xsd:string" />
<xsd:attribute name="name" type="xsd:string" />
</xsd:complexType>
</xsd:element>
<xsd:element name="data">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" msdata:Ordinal="1" />
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
</xsd:complexType>
</xsd:element>
<xsd:element name="resheader">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" />
</xsd:complexType>
</xsd:element>
</xsd:choice>
</xsd:complexType>
</xsd:element>
</xsd:schema>
<resheader name="resmimetype">
<value>text/microsoft-resx</value>
</resheader>
<resheader name="version">
<value>2.0</value>
</resheader>
<resheader name="reader">
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
</root>

@ -1,26 +0,0 @@
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Runtime Version:4.0.30319.42000
//
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//------------------------------------------------------------------------------
namespace FileTransferServer.Properties {
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "16.4.0.0")]
internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase {
private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings())));
public static Settings Default {
get {
return defaultInstance;
}
}
}
}

@ -1,7 +0,0 @@
<?xml version='1.0' encoding='utf-8'?>
<SettingsFile xmlns="uri:settings" CurrentProfile="(Default)">
<Profiles>
<Profile Name="(Default)" />
</Profiles>
<Settings />
</SettingsFile>

@ -2,7 +2,7 @@
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>netcoreapp3.0</TargetFramework>
<TargetFramework>net6.0</TargetFramework>
<Platforms>AnyCPU;x64;x86</Platforms>
</PropertyGroup>

@ -2,7 +2,8 @@
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net5.0</TargetFramework>
<TargetFramework>net6.0</TargetFramework>
<Platforms>AnyCPU;x64</Platforms>
</PropertyGroup>
<ItemGroup>

@ -1,17 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.8"/>
</startup>
<appSettings>
<add key="ServiceName" value="Test consumer"/>
<add key="ServiceKey" value="test.consumer"/>
<add key="ServiceType" value="Destination"/>
<add key="ServiceGroup" value="Test"/>
<add key="Version" value="1.0.0.1"/>
<add key="discovery" value="127.0.0.1:5012"/>
</appSettings>
</configuration>

@ -1,103 +1,15 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{931DEA89-42D1-4C06-9CB8-A3A0412093D6}</ProjectGuid>
<OutputType>Exe</OutputType>
<RootNamespace>Consumer</RootNamespace>
<AssemblyName>Consumer</AssemblyName>
<TargetFrameworkVersion>v4.8</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
<Deterministic>true</Deterministic>
<TargetFrameworkProfile />
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<Platforms>AnyCPU;x64</Platforms>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x64'">
<DebugSymbols>true</DebugSymbols>
<OutputPath>bin\x64\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DebugType>full</DebugType>
<PlatformTarget>x64</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x64'">
<OutputPath>bin\x64\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<Optimize>true</Optimize>
<DebugType>pdbonly</DebugType>
<PlatformTarget>x64</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x86'">
<DebugSymbols>true</DebugSymbols>
<OutputPath>bin\x86\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DebugType>full</DebugType>
<PlatformTarget>x86</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x86'">
<OutputPath>bin\x86\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<Optimize>true</Optimize>
<DebugType>pdbonly</DebugType>
<PlatformTarget>x86</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<ItemGroup>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Data" />
<Reference Include="System.Net.Http" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="ConsumerService.cs" />
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<ItemGroup>
<None Include="App.config" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\ZeroLevel\ZeroLevel.csproj">
<Project>{06c9e60e-d449-41a7-9bf0-a829aaf5d214}</Project>
<Name>ZeroLevel</Name>
</ProjectReference>
<ProjectReference Include="..\..\ZeroLevel\ZeroLevel.csproj" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
</Project>

@ -1,36 +0,0 @@
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("Consumer")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("Consumer")]
[assembly: AssemblyCopyright("Copyright © 2019")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]
// The following GUID is for the ID of the typelib if this project is exposed to COM
[assembly: Guid("931dea89-42d1-4c06-9cb8-a3a0412093d6")]
// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]

@ -1,17 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.8"/>
</startup>
<appSettings>
<add key="ServiceName" value="Test processor"/>
<add key="ServiceKey" value="test.processor"/>
<add key="ServiceType" value="Core"/>
<add key="ServiceGroup" value="Test"/>
<add key="Version" value="1.0.0.1"/>
<add key="discovery" value="127.0.0.1:5012"/>
</appSettings>
</configuration>

@ -1,103 +1,15 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{806D0160-A4BF-4881-AF33-308F4FEF8E15}</ProjectGuid>
<OutputType>Exe</OutputType>
<RootNamespace>Processor</RootNamespace>
<AssemblyName>Processor</AssemblyName>
<TargetFrameworkVersion>v4.8</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
<Deterministic>true</Deterministic>
<TargetFrameworkProfile />
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<Platforms>AnyCPU;x64</Platforms>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x64'">
<DebugSymbols>true</DebugSymbols>
<OutputPath>bin\x64\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DebugType>full</DebugType>
<PlatformTarget>x64</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x64'">
<OutputPath>bin\x64\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<Optimize>true</Optimize>
<DebugType>pdbonly</DebugType>
<PlatformTarget>x64</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x86'">
<DebugSymbols>true</DebugSymbols>
<OutputPath>bin\x86\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DebugType>full</DebugType>
<PlatformTarget>x86</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x86'">
<OutputPath>bin\x86\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<Optimize>true</Optimize>
<DebugType>pdbonly</DebugType>
<PlatformTarget>x86</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<ItemGroup>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Data" />
<Reference Include="System.Net.Http" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="ProcessorService.cs" />
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<ItemGroup>
<None Include="App.config" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\ZeroLevel\ZeroLevel.csproj">
<Project>{06c9e60e-d449-41a7-9bf0-a829aaf5d214}</Project>
<Name>ZeroLevel</Name>
</ProjectReference>
<ProjectReference Include="..\..\ZeroLevel\ZeroLevel.csproj" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
</Project>

@ -1,36 +0,0 @@
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("Processor")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("Processor")]
[assembly: AssemblyCopyright("Copyright © 2019")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]
// The following GUID is for the ID of the typelib if this project is exposed to COM
[assembly: Guid("806d0160-a4bf-4881-af33-308f4fef8e15")]
// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]

@ -1,17 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.8"/>
</startup>
<appSettings>
<add key="ServiceName" value="Test source"/>
<add key="ServiceKey" value="test.source"/>
<add key="ServiceType" value="Sources"/>
<add key="ServiceGroup" value="Test"/>
<add key="Version" value="1.0.0.1"/>
<add key="discovery" value="127.0.0.1:5012"/>
</appSettings>
</configuration>

@ -1,36 +0,0 @@
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("Source")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("Source")]
[assembly: AssemblyCopyright("Copyright © 2019")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]
// The following GUID is for the ID of the typelib if this project is exposed to COM
[assembly: Guid("a1d60994-5744-47d1-b684-c1c0b782998b")]
// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]

@ -1,103 +1,15 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{A1D60994-5744-47D1-B684-C1C0B782998B}</ProjectGuid>
<OutputType>Exe</OutputType>
<RootNamespace>Source</RootNamespace>
<AssemblyName>Source</AssemblyName>
<TargetFrameworkVersion>v4.8</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
<Deterministic>true</Deterministic>
<TargetFrameworkProfile />
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<Platforms>AnyCPU;x64</Platforms>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x64'">
<DebugSymbols>true</DebugSymbols>
<OutputPath>bin\x64\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DebugType>full</DebugType>
<PlatformTarget>x64</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x64'">
<OutputPath>bin\x64\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<Optimize>true</Optimize>
<DebugType>pdbonly</DebugType>
<PlatformTarget>x64</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x86'">
<DebugSymbols>true</DebugSymbols>
<OutputPath>bin\x86\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DebugType>full</DebugType>
<PlatformTarget>x86</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x86'">
<OutputPath>bin\x86\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<Optimize>true</Optimize>
<DebugType>pdbonly</DebugType>
<PlatformTarget>x86</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<ItemGroup>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Data" />
<Reference Include="System.Net.Http" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="SourceService.cs" />
</ItemGroup>
<ItemGroup>
<None Include="App.config" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\ZeroLevel\ZeroLevel.csproj">
<Project>{06c9e60e-d449-41a7-9bf0-a829aaf5d214}</Project>
<Name>ZeroLevel</Name>
</ProjectReference>
<ProjectReference Include="..\..\ZeroLevel\ZeroLevel.csproj" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
</Project>

@ -1,17 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.8"/>
</startup>
<appSettings>
<add key="ServiceName" value="Watcher"/>
<add key="ServiceKey" value="test.watcher"/>
<add key="ServiceType" value="System"/>
<add key="ServiceGroup" value="Test"/>
<add key="Version" value="1.0.0.1"/>
<add key="discovery" value="127.0.0.1:5012"/>
</appSettings>
</configuration>

@ -1,36 +0,0 @@
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("Watcher")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("Watcher")]
[assembly: AssemblyCopyright("Copyright © 2019")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]
// The following GUID is for the ID of the typelib if this project is exposed to COM
[assembly: Guid("6e04f32a-fb90-41d2-9059-f37311f813b3")]
// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]

@ -1,103 +1,15 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{6E04F32A-FB90-41D2-9059-F37311F813B3}</ProjectGuid>
<OutputType>Exe</OutputType>
<RootNamespace>Watcher</RootNamespace>
<AssemblyName>Watcher</AssemblyName>
<TargetFrameworkVersion>v4.8</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
<Deterministic>true</Deterministic>
<TargetFrameworkProfile />
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<Platforms>AnyCPU;x64</Platforms>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x64'">
<DebugSymbols>true</DebugSymbols>
<OutputPath>bin\x64\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DebugType>full</DebugType>
<PlatformTarget>x64</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x64'">
<OutputPath>bin\x64\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<Optimize>true</Optimize>
<DebugType>pdbonly</DebugType>
<PlatformTarget>x64</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x86'">
<DebugSymbols>true</DebugSymbols>
<OutputPath>bin\x86\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DebugType>full</DebugType>
<PlatformTarget>x86</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x86'">
<OutputPath>bin\x86\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<Optimize>true</Optimize>
<DebugType>pdbonly</DebugType>
<PlatformTarget>x86</PlatformTarget>
<LangVersion>7.3</LangVersion>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>true</Prefer32Bit>
</PropertyGroup>
<ItemGroup>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Data" />
<Reference Include="System.Net.Http" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="WatcherService.cs" />
</ItemGroup>
<ItemGroup>
<None Include="App.config" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\ZeroLevel\ZeroLevel.csproj">
<Project>{06c9e60e-d449-41a7-9bf0-a829aaf5d214}</Project>
<Name>ZeroLevel</Name>
</ProjectReference>
<ProjectReference Include="..\..\ZeroLevel\ZeroLevel.csproj" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
</Project>

@ -1,381 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Runtime.Serialization;
using System.Text;
namespace LemmaSharp
{
[Serializable]
public class ExampleList : ISerializable
{
#region Private Variables
private LemmatizerSettings lsett;
private RuleList rlRules;
private Dictionary<string, LemmaExample> dictExamples;
private List<LemmaExample> lstExamples;
#endregion
#region Constructor(s)
public ExampleList(LemmatizerSettings lsett) : base()
{
this.lsett = lsett;
this.dictExamples = new Dictionary<string, LemmaExample>();
this.lstExamples = null;
this.rlRules = new RuleList(lsett);
}
public ExampleList(StreamReader srIn, string sFormat, LemmatizerSettings lsett) : this(lsett)
{
AddMultextFile(srIn, sFormat);
}
#endregion
#region Public Properties & Indexers
public LemmaExample this[int i]
{
get
{
if (lstExamples == null)
FinalizeAdditions();
return lstExamples[i];
}
}
public int Count
{
get
{
if (lstExamples == null)
FinalizeAdditions();
return lstExamples.Count;
}
}
public double WeightSum
{
get
{
if (lstExamples == null)
FinalizeAdditions();
double dWeight = 0;
foreach (LemmaExample exm in lstExamples)
dWeight += exm.Weight;
return dWeight;
}
}
public RuleList Rules
{
get
{
return rlRules;
}
}
public List<LemmaExample> ListExamples
{
get
{
if (lstExamples == null)
FinalizeAdditions();
return lstExamples;
}
}
#endregion
#region Essential Class Functions (adding/removing examples)
public void AddMultextFile(StreamReader srIn, string sFormat)
{
//read from file
string sLine = null;
int iError = 0;
int iLine = 0;
var iW = sFormat.IndexOf('W');
var iL = sFormat.IndexOf('L');
var iM = sFormat.IndexOf('M');
var iF = sFormat.IndexOf('F');
var iLen = Math.Max(Math.Max(iW, iL), Math.Max(iM, iF)) + 1;
if (iW < 0 || iL < 0)
{
throw new Exception("Can not find word and lemma location in the format specification");
}
while ((sLine = srIn.ReadLine()) != null && iError < 50)
{
iLine++;
string[] asWords = sLine.Split(new char[] { '\t' });
if (asWords.Length < iLen)
{
//Console.WriteLine("ERROR: Line doesn't confirm to the given format \"" + sFormat + "\"! Line " + iLine.ToString() + ".");
iError++;
continue;
}
var sWord = asWords[iW];
var sLemma = asWords[iL];
if (sLemma.Equals("=", StringComparison.Ordinal))
sLemma = sWord;
string sMsd = null;
if (iM > -1)
sMsd = asWords[iM];
double dWeight = 1; ;
if (iF > -1)
Double.TryParse(asWords[iM], out dWeight);
AddExample(sWord, sLemma, dWeight, sMsd);
}
if (iError == 50)
throw new Exception("Parsing stopped because of too many (50) errors. Check format specification");
}
public LemmaExample AddExample(string sWord, string sLemma, double dWeight, string sMsd)
{
string sNewMsd = lsett.eMsdConsider != LemmatizerSettings.MsdConsideration.Ignore
? sMsd
: null;
var leNew = new LemmaExample(sWord, sLemma, dWeight, sNewMsd, rlRules, lsett);
return Add(leNew);
}
private LemmaExample Add(LemmaExample leNew)
{
LemmaExample leReturn = null;
if (!dictExamples.TryGetValue(leNew.Signature, out leReturn))
{
leReturn = leNew;
dictExamples.Add(leReturn.Signature, leReturn);
}
else
leReturn.Join(leNew);
lstExamples = null;
return leReturn;
}
public void DropExamples()
{
dictExamples.Clear();
lstExamples = null;
}
public void FinalizeAdditions()
{
if (lstExamples != null)
return;
lstExamples = new List<LemmaExample>(dictExamples.Values);
lstExamples.Sort();
}
public ExampleList GetFrontRearExampleList(bool front)
{
var elExamplesNew = new ExampleList(lsett);
foreach (var le in this.ListExamples)
{
if (front)
elExamplesNew.AddExample(le.WordFront, le.LemmaFront, le.Weight, le.Msd);
else
elExamplesNew.AddExample(le.WordRear, le.LemmaRear, le.Weight, le.Msd);
}
elExamplesNew.FinalizeAdditions();
return elExamplesNew;
}
#endregion
#region Output Functions (ToString)
public override string ToString()
{
var sb = new StringBuilder();
foreach (var exm in lstExamples)
{
sb.AppendLine(exm.ToString());
}
return sb.ToString();
}
#endregion
#region Serialization Functions (.Net Default - ISerializable)
public void GetObjectData(SerializationInfo info, StreamingContext context)
{
info.AddValue("lsett", lsett);
info.AddValue("iNumExamples", dictExamples.Count);
var aWords = new string[dictExamples.Count];
var aLemmas = new string[dictExamples.Count];
var aWeights = new double[dictExamples.Count];
var aMsds = new string[dictExamples.Count];
int iExm = 0;
foreach (var exm in dictExamples.Values)
{
aWords[iExm] = exm.Word;
aLemmas[iExm] = exm.Lemma;
aWeights[iExm] = exm.Weight;
aMsds[iExm] = exm.Msd;
iExm++;
}
info.AddValue("aWords", aWords);
info.AddValue("aLemmas", aLemmas);
info.AddValue("aWeights", aWeights);
info.AddValue("aMsds", aMsds);
}
public ExampleList(SerializationInfo info, StreamingContext context)
{
lsett = (LemmatizerSettings)info.GetValue("lsett", typeof(LemmatizerSettings));
this.dictExamples = new Dictionary<string, LemmaExample>();
this.lstExamples = null;
this.rlRules = new RuleList(lsett);
var aWords = (string[])info.GetValue("aWords", typeof(string[]));
var aLemmas = (string[])info.GetValue("aLemmas", typeof(string[]));
var aWeights = (double[])info.GetValue("aWeights", typeof(double[]));
var aMsds = (string[])info.GetValue("aMsds", typeof(string[]));
for (int iExm = 0; iExm < aWords.Length; iExm++)
AddExample(aWords[iExm], aLemmas[iExm], aWeights[iExm], aMsds[iExm]);
}
#endregion
#region Serialization Functions (Binary)
public void Serialize(BinaryWriter binWrt, bool bSerializeExamples, bool bThisTopObject)
{
//save metadata
binWrt.Write(bThisTopObject);
//save refernce types if needed -------------------------
if (bThisTopObject)
lsett.Serialize(binWrt);
rlRules.Serialize(binWrt, false);
if (!bSerializeExamples)
{
binWrt.Write(false); // lstExamples == null
binWrt.Write(0); // dictExamples.Count == 0
}
else
{
if (lstExamples == null)
{
binWrt.Write(false); // lstExamples == null
//save dictionary items
int iCount = dictExamples.Count;
binWrt.Write(iCount);
foreach (var kvp in dictExamples)
{
binWrt.Write(kvp.Value.Rule.Signature);
kvp.Value.Serialize(binWrt, false);
}
}
else
{
binWrt.Write(true); // lstExamples != null
//save list & dictionary items
var iCount = lstExamples.Count;
binWrt.Write(iCount);
foreach (var le in lstExamples)
{
binWrt.Write(le.Rule.Signature);
le.Serialize(binWrt, false);
}
}
}
}
public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett)
{
//load metadata
var bThisTopObject = binRead.ReadBoolean();
//load refernce types if needed -------------------------
if (bThisTopObject)
this.lsett = new LemmatizerSettings(binRead);
else
this.lsett = lsett;
rlRules = new RuleList(binRead, this.lsett);
var bCreateLstExamples = binRead.ReadBoolean();
lstExamples = bCreateLstExamples ? new List<LemmaExample>() : null;
dictExamples = new Dictionary<string, LemmaExample>();
//load dictionary items
var iCount = binRead.ReadInt32();
for (var iId = 0; iId < iCount; iId++)
{
var lrRule = rlRules[binRead.ReadString()];
var le = new LemmaExample(binRead, this.lsett, lrRule);
dictExamples.Add(le.Signature, le);
if (bCreateLstExamples)
lstExamples.Add(le);
}
}
public ExampleList(BinaryReader binRead, LemmatizerSettings lsett)
{
Deserialize(binRead, lsett);
}
#endregion
#region Serialization Functions (Latino)
#if LATINO
public void Save(Latino.BinarySerializer binWrt, bool bSerializeExamples, bool bThisTopObject) {
//save metadata
binWrt.WriteBool(bThisTopObject);
//save refernce types if needed -------------------------
if (bThisTopObject)
lsett.Save(binWrt);
rlRules.Save(binWrt, false);
if (!bSerializeExamples) {
binWrt.WriteBool(false); // lstExamples == null
binWrt.WriteInt(0); // dictExamples.Count == 0
}
else {
if (lstExamples == null) {
binWrt.WriteBool(false); // lstExamples == null
//save dictionary items
int iCount = dictExamples.Count;
binWrt.WriteInt(iCount);
foreach (KeyValuePair<string, LemmaExample> kvp in dictExamples) {
binWrt.WriteString(kvp.Value.Rule.Signature);
kvp.Value.Save(binWrt, false);
}
}
else {
binWrt.WriteBool(true); // lstExamples != null
//save list & dictionary items
int iCount = lstExamples.Count;
binWrt.WriteInt(iCount);
foreach (LemmaExample le in lstExamples) {
binWrt.WriteString(le.Rule.Signature);
le.Save(binWrt, false);
}
}
}
}
public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett) {
//load metadata
bool bThisTopObject = binRead.ReadBool();
//load refernce types if needed -------------------------
if (bThisTopObject)
this.lsett = new LemmatizerSettings(binRead);
else
this.lsett = lsett;
rlRules = new RuleList(binRead, this.lsett);
bool bCreateLstExamples = binRead.ReadBool();
lstExamples = bCreateLstExamples ? new List<LemmaExample>() : null;
dictExamples = new Dictionary<string, LemmaExample>();
//load dictionary items
int iCount = binRead.ReadInt();
for (int iId = 0; iId < iCount; iId++) {
LemmaRule lrRule = rlRules[binRead.ReadString()];
LemmaExample le = new LemmaExample(binRead, this.lsett, lrRule);
dictExamples.Add(le.Signature, le);
if (bCreateLstExamples) lstExamples.Add(le);
}
}
public ExampleList(Latino.BinarySerializer binRead, LemmatizerSettings lsett) {
Load(binRead, lsett);
}
#endif
#endregion
}
}

@ -1,481 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace LemmaSharp
{
public class LemmaExample : IComparable<LemmaExample>, IComparer<LemmaExample>
{
#region Private Variables
private string sWord;
private string sLemma;
private string sSignature;
private string sMsd;
private double dWeight;
private LemmaRule lrRule;
private LemmatizerSettings lsett;
private string sWordRearCache;
private string sWordFrontCache;
private string sLemmaFrontCache;
#endregion
#region Constructor(s)
public LemmaExample(string sWord, string sLemma, double dWeight, string sMsd, RuleList rlRules, LemmatizerSettings lsett)
{
this.lsett = lsett;
this.sWord = sWord;
this.sLemma = sLemma;
this.sMsd = sMsd;
this.dWeight = dWeight;
this.lrRule = rlRules.AddRule(this);
switch (lsett.eMsdConsider)
{
case LemmatizerSettings.MsdConsideration.Ignore:
case LemmatizerSettings.MsdConsideration.JoinAll:
case LemmatizerSettings.MsdConsideration.JoinDistinct:
case LemmatizerSettings.MsdConsideration.JoinSameSubstring:
sSignature = string.Format("[{0}]==>[{1}]", sWord, sLemma);
break;
case LemmatizerSettings.MsdConsideration.Distinct:
default:
sSignature = string.Format("[{0}]==>[{1}]({2})", sWord, sLemma, sMsd ?? "");
break;
}
this.sWordRearCache = null;
this.sWordFrontCache = null;
this.sLemmaFrontCache = null;
}
#endregion
#region Public Properties
public string Word
{
get
{
return sWord;
}
}
public string Lemma
{
get
{
return sLemma;
}
}
public string Msd
{
get
{
return sMsd;
}
}
public string Signature
{
get
{
return sSignature;
}
}
public double Weight
{
get
{
return dWeight;
}
}
public LemmaRule Rule
{
get
{
return lrRule;
}
}
/// <summary>
/// Word to be pre-lemmatized with Front-Lemmatizer into LemmaFront which is then lemmatized by standard Rear-Lemmatizer (Warning it is reversed)
/// </summary>
public string WordFront
{
get
{
if (sWordFrontCache == null)
sWordFrontCache = StringReverse(sWord);
return sWordFrontCache;
}
}
/// <summary>
/// Lemma to be produced by pre-lemmatizing with Front-Lemmatizer (Warning it is reversed)
/// </summary>
public string LemmaFront
{
get
{
if (sLemmaFrontCache == null)
sLemmaFrontCache = StringReverse(WordRear);
return sLemmaFrontCache;
}
}
/// <summary>
/// word to be lemmatized by standard Rear-Lemmatizer (it's beggining has been already modified by Front-Lemmatizer)
/// </summary>
public string WordRear
{
get
{
if (sWordRearCache == null)
{
int lemmaPos = 0, wordPos = 0;
var common = LongestCommonSubstring(sWord, sLemma, ref wordPos, ref lemmaPos);
sWordRearCache = lemmaPos == -1 ? sLemma : (sLemma.Substring(0, lemmaPos + common.Length) + sWord.Substring(wordPos + common.Length));
}
return sWordRearCache;
}
}
/// <summary>
/// lemma to be produced by standard Rear-Lemmatizer from WordRear
/// </summary>
public string LemmaRear
{
get
{
return sLemma;
}
}
#endregion
#region Essential Class Functions (joining two examples into one)
//TODO - this function is not totaly ok because sMsd should not be
//changed since it could be included in signature
public void Join(LemmaExample leJoin)
{
dWeight += leJoin.dWeight;
if (sMsd != null)
switch (lsett.eMsdConsider)
{
case LemmatizerSettings.MsdConsideration.Ignore:
sMsd = null;
break;
case LemmatizerSettings.MsdConsideration.Distinct:
break;
case LemmatizerSettings.MsdConsideration.JoinAll:
sMsd += "|" + leJoin.sMsd;
break;
case LemmatizerSettings.MsdConsideration.JoinDistinct:
var append = string.Format("|{0}", leJoin.sMsd);
if (false == sMsd.Equals(leJoin.sMsd, StringComparison.Ordinal) &&
sMsd.IndexOf(append) < 0)
{
sMsd += append;
}
break;
case LemmatizerSettings.MsdConsideration.JoinSameSubstring:
int iPos = 0;
var iMax = Math.Min(sMsd.Length, leJoin.sMsd.Length);
while (iPos < iMax && sMsd[iPos] == leJoin.sMsd[iPos])
iPos++;
sMsd = sMsd.Substring(0, iPos);
break;
default:
break;
}
}
#endregion
#region Essential Class Functions (calculating similarities betwen examples)
public int Similarity(LemmaExample le)
{
return Similarity(this, le);
}
public static int Similarity(LemmaExample le1, LemmaExample le2)
{
var sWord1 = le1.sWord;
var sWord2 = le2.sWord;
var iLen1 = sWord1.Length;
var iLen2 = sWord2.Length;
var iMaxLen = Math.Min(iLen1, iLen2);
for (var iPos = 1; iPos <= iMaxLen; iPos++)
{
if (sWord1[iLen1 - iPos] != sWord2[iLen2 - iPos])
return iPos - 1;
}
//TODO similarity should be bigger if two words are totaly equal
//if (sWord1 == sWord2)
// return iMaxLen + 1;
//else
return iMaxLen;
}
#endregion
#region Essential Class Functions (comparing examples - eg.: for sorting)
/// <summary>
/// Function used to comprare current MultextExample (ME) against argument ME.
/// Mainly used in for sorting lists of MEs.
/// </summary>
/// <param name="other"> MultextExample (ME) that we compare current ME against.</param>
/// <returns>1 if current ME is bigger, -1 if smaler and 0 if both are the same.</returns>
public int CompareTo(LemmaExample other)
{
var iComparison = CompareStrings(this.sWord, other.sWord, false);
if (iComparison != 0)
return iComparison;
iComparison = CompareStrings(this.sLemma, other.sLemma, true);
if (iComparison != 0)
return iComparison;
if (lsett.eMsdConsider == LemmatizerSettings.MsdConsideration.Distinct &&
this.sMsd != null && other.sMsd != null)
{
iComparison = CompareStrings(this.sMsd, other.sMsd, true);
if (iComparison != 0)
return iComparison;
}
return 0;
}
public int Compare(LemmaExample x, LemmaExample y)
{
return x.CompareTo(y);
}
public static int CompareStrings(string sStr1, string sStr2, bool bForward)
{
var iLen1 = sStr1.Length;
var iLen2 = sStr2.Length;
var iMaxLen = Math.Min(iLen1, iLen2);
if (bForward)
{
for (int iPos = 0; iPos < iMaxLen; iPos++)
{
if (sStr1[iPos] > sStr2[iPos])
return 1;
if (sStr1[iPos] < sStr2[iPos])
return -1;
}
}
else
{
for (int iPos = 1; iPos <= iMaxLen; iPos++)
{
if (sStr1[iLen1 - iPos] > sStr2[iLen2 - iPos])
return 1;
if (sStr1[iLen1 - iPos] < sStr2[iLen2 - iPos])
return -1;
}
}
if (iLen1 > iLen2)
return 1;
if (iLen1 < iLen2)
return -1;
return 0;
}
public static int EqualPrifixLen(string sStr1, string sStr2)
{
var iLen1 = sStr1.Length;
var iLen2 = sStr2.Length;
var iMaxLen = Math.Min(iLen1, iLen2);
for (var iPos = 0; iPos < iMaxLen; iPos++)
{
if (sStr1[iPos] != sStr2[iPos])
return iPos;
}
return iMaxLen;
}
public static string LongestCommonSubstring(string sStr1, string sStr2, ref int iPosInStr1, ref int iPosInStr2)
{
var l = new int[sStr1.Length + 1, sStr2.Length + 1];
int z = 0;
string ret = "";
iPosInStr1 = -1;
iPosInStr2 = -1;
for (var i = 0; i < sStr1.Length; i++)
{
for (var j = 0; j < sStr2.Length; j++)
{
if (sStr1[i] == sStr2[j])
{
if (i == 0 || j == 0)
{
l[i, j] = 1;
}
else
{
l[i, j] = l[i - 1, j - 1] + 1;
}
if (l[i, j] > z)
{
z = l[i, j];
iPosInStr1 = i - z + 1;
iPosInStr2 = j - z + 1;
ret = sStr1.Substring(i - z + 1, z);
}
}
}
}
return ret;
}
public static string StringReverse(string s)
{
if (s == null)
return null;
var charArray = new char[s.Length];
var len = s.Length >> 1;
for (var i = 0; i < len; i++, len--)
{
charArray[i] = s[len];
charArray[len] = s[i];
}
return new string(charArray);
}
#endregion
#region Output Functions (ToString)
public override string ToString()
{
var sb = new StringBuilder();
if (sWord != null)
sb.AppendFormat("W:\"{0}\" ", sWord);
if (sLemma != null)
sb.AppendFormat("L:\"{0}\" ", sLemma);
if (sMsd != null)
sb.AppendFormat("M:\"{0}\" ", sMsd);
if (false == Double.IsNaN(dWeight))
sb.AppendFormat("F:\"{0}\" ", dWeight);
if (lrRule != null)
sb.AppendFormat("R:{0} ", lrRule);
if (sb.Length > 0)
return sb.ToString(0, sb.Length - 1);
return string.Empty;
}
#endregion
#region Serialization Functions (Binary)
public void Serialize(BinaryWriter binWrt, bool bThisTopObject)
{
//save metadata
binWrt.Write(bThisTopObject);
//save value types --------------------------------------
binWrt.Write(sWord);
binWrt.Write(sLemma);
binWrt.Write(sSignature);
if (sMsd == null)
{
binWrt.Write(false);
}
else
{
binWrt.Write(true);
binWrt.Write(sMsd);
}
binWrt.Write(dWeight);
//save refernce types if needed -------------------------
if (bThisTopObject)
{
lsett.Serialize(binWrt);
lrRule.Serialize(binWrt, false);
}
}
public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett, LemmaRule lrRule)
{
//load metadata
var bThisTopObject = binRead.ReadBoolean();
//load value types --------------------------------------
sWord = binRead.ReadString();
sLemma = binRead.ReadString();
sSignature = binRead.ReadString();
if (binRead.ReadBoolean())
sMsd = binRead.ReadString();
else
sMsd = null;
dWeight = binRead.ReadDouble();
//load refernce types if needed -------------------------
if (bThisTopObject)
{
this.lsett = new LemmatizerSettings(binRead);
this.lrRule = new LemmaRule(binRead, this.lsett);
}
else
{
this.lsett = lsett;
this.lrRule = lrRule;
}
this.sWordRearCache = null;
this.sWordFrontCache = null;
this.sLemmaFrontCache = null;
}
public LemmaExample(BinaryReader binRead, LemmatizerSettings lsett, LemmaRule lrRule)
{
Deserialize(binRead, lsett, lrRule);
}
#endregion
#region Serialization Functions (Latino)
#if LATINO
public void Save(Latino.BinarySerializer binWrt, bool bThisTopObject) {
//save metadata
binWrt.WriteBool(bThisTopObject);
//save value types --------------------------------------
binWrt.WriteString(sWord);
binWrt.WriteString(sLemma);
binWrt.WriteString(sSignature);
if (sMsd == null)
binWrt.WriteBool(false);
else {
binWrt.WriteBool(true);
binWrt.WriteString(sMsd);
}
binWrt.WriteDouble(dWeight);
//save refernce types if needed -------------------------
if (bThisTopObject) {
lsett.Save(binWrt);
lrRule.Save(binWrt, false);
}
}
public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett, LemmaRule lrRule) {
//load metadata
bool bThisTopObject = binRead.ReadBool();
//load value types --------------------------------------
sWord = binRead.ReadString();
sLemma = binRead.ReadString();
sSignature = binRead.ReadString();
if (binRead.ReadBool())
sMsd = binRead.ReadString();
else
sMsd = null;
dWeight = binRead.ReadDouble();
//load refernce types if needed -------------------------
if (bThisTopObject) {
this.lsett = new LemmatizerSettings(binRead);
this.lrRule = new LemmaRule(binRead, this.lsett);
}
else {
this.lsett = lsett;
this.lrRule = lrRule;
}
}
public LemmaExample(Latino.BinarySerializer binRead, LemmatizerSettings lsett, LemmaRule lrRule) {
Load(binRead, lsett, lrRule);
}
#endif
#endregion
}
}

@ -1,189 +0,0 @@
using System;
using System.IO;
namespace LemmaSharp
{
public class LemmaRule
{
#region Private Variables
private int iId;
private int iFrom;
private string sFrom;
private string sTo;
private string sSignature;
private LemmatizerSettings lsett;
#endregion
#region Constructor(s)
public LemmaRule(string sWord, string sLemma, int iId, LemmatizerSettings lsett)
{
this.lsett = lsett;
this.iId = iId;
int iSameStem = SameStem(sWord, sLemma);
sTo = sLemma.Substring(iSameStem);
iFrom = sWord.Length - iSameStem;
if (lsett.bUseFromInRules)
{
sFrom = sWord.Substring(iSameStem);
sSignature = string.Format("[{0}]==>[{1}]", sFrom, sTo);
}
else
{
sFrom = null;
sSignature = string.Format("[#{0}]==>[{1}]", iFrom, sTo);
}
}
#endregion
#region Public Properties
public string Signature
{
get
{
return sSignature;
}
}
public int Id
{
get
{
return iId;
}
}
#endregion
#region Essential Class Functions
private static int SameStem(string sStr1, string sStr2)
{
var iLen1 = sStr1.Length;
var iLen2 = sStr2.Length;
var iMaxLen = Math.Min(iLen1, iLen2);
for (var iPos = 0; iPos < iMaxLen; iPos++)
{
if (sStr1[iPos] != sStr2[iPos])
return iPos;
}
return iMaxLen;
}
public bool IsApplicableToGroup(int iGroupCondLen)
{
return iGroupCondLen >= iFrom;
}
public string Lemmatize(string sWord)
{
return sWord.Substring(0, sWord.Length - iFrom) + sTo;
}
#endregion
#region Output Functions (ToString)
public override string ToString()
{
return string.Format("{0}:{1}", iId, sSignature);
}
#endregion
#region Serialization Functions (Binary)
public void Serialize(BinaryWriter binWrt, bool bThisTopObject)
{
//save metadata
binWrt.Write(bThisTopObject);
//save value types --------------------------------------
binWrt.Write(iId);
binWrt.Write(iFrom);
if (sFrom == null)
binWrt.Write(false);
else
{
binWrt.Write(true);
binWrt.Write(sFrom);
}
binWrt.Write(sTo);
binWrt.Write(sSignature);
if (bThisTopObject)
lsett.Serialize(binWrt);
}
public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett)
{
//load metadata
var bThisTopObject = binRead.ReadBoolean();
//load value types --------------------------------------
iId = binRead.ReadInt32();
iFrom = binRead.ReadInt32();
if (binRead.ReadBoolean())
{
sFrom = binRead.ReadString();
}
else
{
sFrom = null;
}
sTo = binRead.ReadString();
sSignature = binRead.ReadString();
//load refernce types if needed -------------------------
if (bThisTopObject)
this.lsett = new LemmatizerSettings(binRead);
else
this.lsett = lsett;
}
public LemmaRule(System.IO.BinaryReader binRead, LemmatizerSettings lsett)
{
this.Deserialize(binRead, lsett);
}
#endregion
#region Serialization Functions (Latino)
#if LATINO
public void Save(Latino.BinarySerializer binWrt, bool bThisTopObject) {
//save metadata
binWrt.WriteBool(bThisTopObject);
//save value types --------------------------------------
binWrt.WriteInt(iId);
binWrt.WriteInt(iFrom);
if (sFrom == null)
binWrt.WriteBool(false);
else {
binWrt.WriteBool(true);
binWrt.WriteString(sFrom);
}
binWrt.WriteString(sTo);
binWrt.WriteString(sSignature);
if (bThisTopObject)
lsett.Save(binWrt);
}
public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett) {
//load metadata
bool bThisTopObject = binRead.ReadBool();
//load value types --------------------------------------
iId = binRead.ReadInt();
iFrom = binRead.ReadInt();
if (binRead.ReadBool())
sFrom = binRead.ReadString();
else
sFrom = null;
sTo = binRead.ReadString();
sSignature = binRead.ReadString();
//load refernce types if needed -------------------------
if (bThisTopObject)
this.lsett = new LemmatizerSettings(binRead);
else
this.lsett = lsett;
}
public LemmaRule(Latino.BinarySerializer binRead, LemmatizerSettings lsett) {
Load(binRead, lsett);
}
#endif
#endregion
}
}

@ -1,478 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace LemmaSharp
{
[Serializable]
public class LemmaTreeNode : ILemmatizerModel
{
#region Private Variables
//settings
private LemmatizerSettings lsett;
//tree structure references
private Dictionary<char, LemmaTreeNode> dictSubNodes;
private LemmaTreeNode ltnParentNode;
//essential node properties
private int iSimilarity; //similarity among all words in this node
private string sCondition; //suffix that must match in order to lemmatize
private bool bWholeWord; //true if condition has to match to whole word
//rules and weights;
private LemmaRule lrBestRule; //the best rule to be applied when lemmatizing
private RuleWeighted[] aBestRules; //list of best rules
private double dWeight;
//source of this node
private int iStart;
private int iEnd;
private ExampleList elExamples;
#endregion
#region Constructor(s) & Destructor(s)
private LemmaTreeNode(LemmatizerSettings lsett)
{
this.lsett = lsett;
}
public LemmaTreeNode(LemmatizerSettings lsett, ExampleList elExamples)
: this(lsett, elExamples, 0, elExamples.Count - 1, null)
{
}
/// <summary>
///
/// </summary>
/// <param name="lsett"></param>
/// <param name="elExamples"></param>
/// <param name="iStart">Index of the first word of the current group</param>
/// <param name="iEnd">Index of the last word of the current group</param>
/// <param name="ltnParentNode"></param>
private LemmaTreeNode(LemmatizerSettings lsett, ExampleList elExamples, int iStart, int iEnd, LemmaTreeNode ltnParentNode) : this(lsett)
{
this.ltnParentNode = ltnParentNode;
this.dictSubNodes = null;
this.iStart = iStart;
this.iEnd = iEnd;
this.elExamples = elExamples;
if (iStart >= elExamples.Count || iEnd >= elExamples.Count || iStart > iEnd)
{
lrBestRule = elExamples.Rules.DefaultRule;
aBestRules = new RuleWeighted[1];
aBestRules[0] = new RuleWeighted(lrBestRule, 0);
dWeight = 0;
return;
}
int iConditionLength = Math.Min(ltnParentNode == null ? 0 : ltnParentNode.iSimilarity + 1, elExamples[iStart].Word.Length);
this.sCondition = elExamples[iStart].Word.Substring(elExamples[iStart].Word.Length - iConditionLength);
this.iSimilarity = elExamples[iStart].Similarity(elExamples[iEnd]);
this.bWholeWord = ltnParentNode == null ? false : elExamples[iEnd].Word.Length == ltnParentNode.iSimilarity;
FindBestRules();
AddSubAll();
//TODO check this heuristics, can be problematic when there are more applicable rules
if (dictSubNodes != null)
{
var lReplaceNodes = new List<KeyValuePair<char, LemmaTreeNode>>();
foreach (var kvpChild in dictSubNodes)
if (kvpChild.Value.dictSubNodes != null && kvpChild.Value.dictSubNodes.Count == 1)
{
var enumChildChild = kvpChild.Value.dictSubNodes.Values.GetEnumerator();
enumChildChild.MoveNext();
var ltrChildChild = enumChildChild.Current;
if (kvpChild.Value.lrBestRule == lrBestRule)
lReplaceNodes.Add(new KeyValuePair<char, LemmaTreeNode>(kvpChild.Key, ltrChildChild));
}
foreach (var kvpChild in lReplaceNodes)
{
dictSubNodes[kvpChild.Key] = kvpChild.Value;
kvpChild.Value.ltnParentNode = this;
}
}
}
#endregion
#region Public Properties
public int TreeSize
{
get
{
int iCount = 1;
if (dictSubNodes != null)
{
foreach (var ltnChild in dictSubNodes.Values)
{
iCount += ltnChild.TreeSize;
}
}
return iCount;
}
}
public double Weight
{
get
{
return dWeight;
}
}
#endregion
#region Essential Class Functions (building model)
private void FindBestRules()
{
/*
* LINQ SPEED TEST (Slower than current metodology)
*
List<LemmaExample> leApplicable = new List<LemmaExample>();
for (int iExm = iStart; iExm <= iEnd; iExm++)
if (elExamples[iExm].Rule.IsApplicableToGroup(sCondition.Length))
leApplicable.Add(elExamples[iExm]);
List<KeyValuePair<LemmaRule, double>> lBestRules = new List<KeyValuePair<LemmaRule,double>>();
lBestRules.AddRange(
leApplicable.
GroupBy<LemmaExample, LemmaRule, double, KeyValuePair<LemmaRule, double>>(
le => le.Rule,
le => le.Weight,
(lr, enumDbl) => new KeyValuePair<LemmaRule, double>(lr, enumDbl.Aggregate((acc, curr) => acc + curr))
).
OrderBy(kvpLrWght=>kvpLrWght.Value)
);
if (lBestRules.Count > 0)
lrBestRule = lBestRules[0].Key;
else {
lrBestRule = elExamples.Rules.DefaultRule;
}
*/
dWeight = 0;
//calculate dWeight of whole node and calculates qualities for all rules
var dictApplicableRules = new Dictionary<LemmaRule, double>();
//dictApplicableRules.Add(elExamples.Rules.DefaultRule, 0);
while (dictApplicableRules.Count == 0)
{
for (var iExm = iStart; iExm <= iEnd; iExm++)
{
var lr = elExamples[iExm].Rule;
var dExmWeight = elExamples[iExm].Weight;
dWeight += dExmWeight;
if (lr.IsApplicableToGroup(sCondition.Length))
{
if (dictApplicableRules.ContainsKey(lr))
dictApplicableRules[lr] += dExmWeight;
else
dictApplicableRules.Add(lr, dExmWeight);
}
}
//if none found then increase condition length or add some default appliable rule
if (dictApplicableRules.Count == 0)
{
if (this.sCondition.Length < iSimilarity)
this.sCondition = elExamples[iStart].Word.Substring(elExamples[iStart].Word.Length - (sCondition.Length + 1));
else
//TODO preveri hevristiko, mogoce je bolje ce se doda default rule namesto rulea od starsa
dictApplicableRules.Add(ltnParentNode.lrBestRule, 0);
}
}
//TODO can optimize this step using sorted list (dont add if it's worse than the worst)
var lSortedRules = new List<RuleWeighted>();
foreach (var kvp in dictApplicableRules)
{
lSortedRules.Add(new RuleWeighted(kvp.Key, kvp.Value / dWeight));
}
lSortedRules.Sort();
//keep just best iMaxRulesPerNode rules
var iNumRules = lSortedRules.Count;
if (lsett.iMaxRulesPerNode > 0)
iNumRules = Math.Min(lSortedRules.Count, lsett.iMaxRulesPerNode);
aBestRules = new RuleWeighted[iNumRules];
for (var iRule = 0; iRule < iNumRules; iRule++)
{
aBestRules[iRule] = lSortedRules[iRule];
}
//set best rule
lrBestRule = aBestRules[0].Rule;
//TODO must check if this hevristics is OK (to privilige parent rule)
if (ltnParentNode != null)
{
for (int iRule = 0; iRule < lSortedRules.Count &&
lSortedRules[iRule].Weight == lSortedRules[0].Weight; iRule++)
{
if (lSortedRules[iRule].Rule == ltnParentNode.lrBestRule)
{
lrBestRule = lSortedRules[iRule].Rule;
break;
}
}
}
}
private void AddSubAll()
{
int iStartGroup = iStart;
var chCharPrev = '\0';
var bSubGroupNeeded = false;
for (var iWrd = iStart; iWrd <= iEnd; iWrd++)
{
var sWord = elExamples[iWrd].Word;
var chCharThis = sWord.Length > iSimilarity ? sWord[sWord.Length - 1 - iSimilarity] : '\0';
if (iWrd != iStart && chCharPrev != chCharThis)
{
if (bSubGroupNeeded)
{
AddSub(iStartGroup, iWrd - 1, chCharPrev);
bSubGroupNeeded = false;
}
iStartGroup = iWrd;
}
//TODO check out bSubGroupNeeded when there are multiple posible rules (not just lrBestRule)
if (elExamples[iWrd].Rule != lrBestRule)
{
bSubGroupNeeded = true;
}
chCharPrev = chCharThis;
}
if (bSubGroupNeeded && iStartGroup != iStart)
{
AddSub(iStartGroup, iEnd, chCharPrev);
}
}
private void AddSub(int iStart, int iEnd, char chChar)
{
var ltnSub = new LemmaTreeNode(lsett, elExamples, iStart, iEnd, this);
//TODO - maybe not realy appropriate because loosing statisitcs from multiple possible rules
if (ltnSub.lrBestRule == lrBestRule && ltnSub.dictSubNodes == null)
return;
if (dictSubNodes == null)
dictSubNodes = new Dictionary<char, LemmaTreeNode>();
dictSubNodes.Add(chChar, ltnSub);
}
#endregion
#region Essential Class Functions (running model = lemmatizing)
public bool ConditionSatisfied(string sWord)
{
//if (bWholeWord)
// return sWord == sCondition;
//else
// return sWord.EndsWith(sCondition);
var iDiff = sWord.Length - sCondition.Length;
if (iDiff < 0 || (bWholeWord && iDiff > 0))
return false;
var iWrdEnd = sCondition.Length - ltnParentNode.sCondition.Length - 1;
for (var iChar = 0; iChar < iWrdEnd; iChar++)
{
if (sCondition[iChar] != sWord[iChar + iDiff])
return false;
}
return true;
}
public string Lemmatize(string sWord)
{
if (sWord.Length >= iSimilarity && dictSubNodes != null)
{
char chChar = sWord.Length > iSimilarity ? sWord[sWord.Length - 1 - iSimilarity] : '\0';
if (dictSubNodes.ContainsKey(chChar) && dictSubNodes[chChar].ConditionSatisfied(sWord))
return dictSubNodes[chChar].Lemmatize(sWord);
}
return lrBestRule.Lemmatize(sWord);
}
#endregion
#region Output Functions (ToString)
public override string ToString()
{
var sb = new StringBuilder();
ToString(sb, 0);
return sb.ToString();
}
private void ToString(StringBuilder sb, int iLevel)
{
sb.Append(new string('\t', iLevel));
sb.AppendFormat("Suffix=\"{0}{1}\"; ", bWholeWord ? "^" : string.Empty, sCondition);
sb.AppendFormat("Rule=\"{0}\"; ", lrBestRule);
sb.AppendFormat("Weight=\"{0}\"; ", dWeight);
if (aBestRules != null && aBestRules.Length > 0)
sb.AppendFormat("Cover={0}; ", aBestRules[0].Weight);
sb.Append("Rulles=");
if (aBestRules != null)
{
foreach (var rw in aBestRules)
sb.AppendFormat(" {0}", rw);
}
sb.Append("; ");
sb.AppendLine();
if (dictSubNodes != null)
{
foreach (var ltnChild in dictSubNodes.Values)
{
ltnChild.ToString(sb, iLevel + 1);
}
}
}
#endregion
#region Serialization Functions (Binary)
public void Serialize(BinaryWriter binWrt)
{
binWrt.Write(dictSubNodes != null);
if (dictSubNodes != null)
{
binWrt.Write(dictSubNodes.Count);
foreach (var kvp in dictSubNodes)
{
binWrt.Write(kvp.Key);
kvp.Value.Serialize(binWrt);
}
}
binWrt.Write(iSimilarity);
binWrt.Write(sCondition);
binWrt.Write(bWholeWord);
binWrt.Write(lrBestRule.Signature);
binWrt.Write(aBestRules.Length);
for (var i = 0; i < aBestRules.Length; i++)
{
binWrt.Write(aBestRules[i].Rule.Signature);
binWrt.Write(aBestRules[i].Weight);
}
binWrt.Write(dWeight);
binWrt.Write(iStart);
binWrt.Write(iEnd);
}
public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode)
{
this.lsett = lsett;
if (binRead.ReadBoolean())
{
dictSubNodes = new Dictionary<char, LemmaTreeNode>();
var iCount = binRead.ReadInt32();
for (var i = 0; i < iCount; i++)
{
var cKey = binRead.ReadChar();
var ltrSub = new LemmaTreeNode(binRead, this.lsett, elExamples, this);
dictSubNodes.Add(cKey, ltrSub);
}
}
else
{
dictSubNodes = null;
}
this.ltnParentNode = ltnParentNode;
iSimilarity = binRead.ReadInt32();
sCondition = binRead.ReadString();
bWholeWord = binRead.ReadBoolean();
lrBestRule = elExamples.Rules[binRead.ReadString()];
var iCountBest = binRead.ReadInt32();
aBestRules = new RuleWeighted[iCountBest];
for (var i = 0; i < iCountBest; i++)
{
aBestRules[i] =
new RuleWeighted(elExamples.Rules[binRead.ReadString()], binRead.ReadDouble());
}
dWeight = binRead.ReadDouble();
iStart = binRead.ReadInt32();
iEnd = binRead.ReadInt32();
this.elExamples = elExamples;
}
public LemmaTreeNode(BinaryReader binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode)
{
Deserialize(binRead, lsett, elExamples, ltnParentNode);
}
#endregion
#region Serialization Functions (Latino)
#if LATINO
public void Save(Latino.BinarySerializer binWrt) {
binWrt.WriteBool(dictSubNodes != null);
if (dictSubNodes != null) {
binWrt.WriteInt(dictSubNodes.Count);
foreach (KeyValuePair<char, LemmaTreeNode> kvp in dictSubNodes) {
binWrt.WriteChar(kvp.Key);
kvp.Value.Save(binWrt);
}
}
binWrt.WriteInt(iSimilarity);
binWrt.WriteString(sCondition);
binWrt.WriteBool(bWholeWord);
binWrt.WriteString(lrBestRule.Signature);
binWrt.WriteInt(aBestRules.Length);
for (int i = 0; i < aBestRules.Length; i++) {
binWrt.WriteString(aBestRules[i].Rule.Signature);
binWrt.WriteDouble(aBestRules[i].Weight);
}
binWrt.WriteDouble(dWeight);
binWrt.WriteInt(iStart);
binWrt.WriteInt(iEnd);
}
public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode) {
this.lsett = lsett;
if (binRead.ReadBool()) {
dictSubNodes = new Dictionary<char, LemmaTreeNode>();
int iCount = binRead.ReadInt();
for (int i = 0; i < iCount; i++) {
char cKey = binRead.ReadChar();
LemmaTreeNode ltrSub = new LemmaTreeNode(binRead, this.lsett, elExamples, this);
dictSubNodes.Add(cKey, ltrSub);
}
}
else
dictSubNodes = null;
this.ltnParentNode = ltnParentNode;
iSimilarity = binRead.ReadInt();
sCondition = binRead.ReadString();
bWholeWord = binRead.ReadBool();
lrBestRule = elExamples.Rules[binRead.ReadString()];
int iCountBest = binRead.ReadInt();
aBestRules = new RuleWeighted[iCountBest];
for (int i = 0; i < iCountBest; i++)
aBestRules[i] = new RuleWeighted(elExamples.Rules[binRead.ReadString()], binRead.ReadDouble());
dWeight = binRead.ReadDouble();
iStart = binRead.ReadInt();
iEnd = binRead.ReadInt();
this.elExamples = elExamples;
}
public LemmaTreeNode(Latino.BinarySerializer binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode) {
Load(binRead, lsett, elExamples, ltnParentNode);
}
#endif
#endregion
#region Other (Temporarly)
//TODO - this is temp function, remove it
public bool CheckConsistency()
{
var bReturn = true;
if (dictSubNodes != null)
foreach (var ltnChild in dictSubNodes.Values)
bReturn = bReturn &&
ltnChild.CheckConsistency() &&
ltnChild.sCondition.EndsWith(sCondition);
return bReturn;
}
#endregion
}
}

@ -1,465 +0,0 @@
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Runtime.Serialization;
using System.IO.Compression;
using SevenZip;
namespace LemmaSharp
{
[Serializable]
public class Lemmatizer : ITrainableLemmatizer
#if LATINO
, Latino.ISerializable
#endif
{
#region Private Variables
protected LemmatizerSettings lsett;
protected ExampleList elExamples;
protected LemmaTreeNode ltnRootNode;
protected LemmaTreeNode ltnRootNodeFront;
#endregion
#region Constructor(s)
public Lemmatizer() :
this(new LemmatizerSettings())
{ }
public Lemmatizer(LemmatizerSettings lsett)
{
this.lsett = lsett;
this.elExamples = new ExampleList(lsett);
this.ltnRootNode = null;
this.ltnRootNodeFront = null;
}
public Lemmatizer(StreamReader srIn, string sFormat, LemmatizerSettings lsett) : this(lsett)
{
AddMultextFile(srIn, sFormat);
}
#endregion
#region Private Properties
private LemmaTreeNode ltrRootNodeSafe
{
get
{
if (ltnRootNode == null)
BuildModel();
return ltnRootNode;
}
}
private LemmaTreeNode ltrRootNodeFrontSafe
{
get
{
if (ltnRootNodeFront == null && lsett.bBuildFrontLemmatizer)
BuildModel();
return ltnRootNodeFront;
}
}
#endregion
#region Public Properties
public LemmatizerSettings Settings
{
get
{
return lsett.CloneDeep();
}
}
public ExampleList Examples
{
get
{
return elExamples;
}
}
public RuleList Rules
{
get
{
return elExamples.Rules;
}
}
public LemmaTreeNode RootNode
{
get
{
return ltrRootNodeSafe;
}
}
public LemmaTreeNode RootNodeFront
{
get
{
return ltrRootNodeFrontSafe;
}
}
public ILemmatizerModel Model
{
get
{
return ltrRootNodeSafe;
}
}
#endregion
#region Essential Class Functions (adding examples to repository)
public void AddMultextFile(StreamReader srIn, string sFormat)
{
this.elExamples.AddMultextFile(srIn, sFormat);
ltnRootNode = null;
}
public void AddExample(string sWord, string sLemma)
{
AddExample(sWord, sLemma, 1, null);
}
public void AddExample(string sWord, string sLemma, double dWeight)
{
AddExample(sWord, sLemma, dWeight, null);
}
public void AddExample(string sWord, string sLemma, double dWeight, string sMsd)
{
elExamples.AddExample(sWord, sLemma, dWeight, sMsd);
ltnRootNode = null;
}
public void DropExamples()
{
elExamples.DropExamples();
}
public void FinalizeAdditions()
{
elExamples.FinalizeAdditions();
}
#endregion
#region Essential Class Functions (building model & lemmatizing)
public void BuildModel()
{
if (ltnRootNode != null)
return;
if (!lsett.bBuildFrontLemmatizer)
{
//TODO remove: elExamples.FinalizeAdditions();
elExamples.FinalizeAdditions();
ltnRootNode = new LemmaTreeNode(lsett, elExamples);
}
else
{
ltnRootNode = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(false));
ltnRootNodeFront = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(true));
}
}
public string Lemmatize(string sWord)
{
if (!lsett.bBuildFrontLemmatizer)
{
return ltrRootNodeSafe.Lemmatize(sWord);
}
var sWordFront = LemmaExample.StringReverse(sWord);
var sLemmaFront = ltrRootNodeFrontSafe.Lemmatize(sWordFront);
var sWordRear = LemmaExample.StringReverse(sLemmaFront);
return ltrRootNodeSafe.Lemmatize(sWordRear);
}
#endregion
#region Serialization Functions (ISerializable)
public void GetObjectData(SerializationInfo info, StreamingContext context)
{
info.AddValue("lsett", lsett);
info.AddValue("elExamples", elExamples);
}
public Lemmatizer(SerializationInfo info, StreamingContext context) : this()
{
lsett = (LemmatizerSettings)info.GetValue("lsett", typeof(LemmatizerSettings));
elExamples = (ExampleList)info.GetValue("elExamples", typeof(ExampleList));
this.BuildModel();
}
#endregion
#region Serialization Functions (Binary)
public void Serialize(BinaryWriter binWrt, bool bSerializeExamples)
{
lsett.Serialize(binWrt);
binWrt.Write(bSerializeExamples);
elExamples.Serialize(binWrt, bSerializeExamples, false);
if (!bSerializeExamples)
{
elExamples.GetFrontRearExampleList(false).Serialize(binWrt, bSerializeExamples, false);
elExamples.GetFrontRearExampleList(true).Serialize(binWrt, bSerializeExamples, false);
}
ltnRootNode.Serialize(binWrt);
if (lsett.bBuildFrontLemmatizer)
ltnRootNodeFront.Serialize(binWrt);
}
public void Deserialize(BinaryReader binRead)
{
lsett = new LemmatizerSettings(binRead);
var bSerializeExamples = binRead.ReadBoolean();
elExamples = new ExampleList(binRead, lsett);
ExampleList elExamplesRear;
ExampleList elExamplesFront;
if (bSerializeExamples)
{
elExamplesRear = elExamples.GetFrontRearExampleList(false);
elExamplesFront = elExamples.GetFrontRearExampleList(true);
}
else
{
elExamplesRear = new ExampleList(binRead, lsett);
elExamplesFront = new ExampleList(binRead, lsett);
}
if (!lsett.bBuildFrontLemmatizer)
{
ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples, null);
}
else
{
ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamplesRear, null);
ltnRootNodeFront = new LemmaTreeNode(binRead, lsett, elExamplesFront, null);
}
}
//Do not change the order!!! (If new compression algorithms are added, otherwise you will not be able to load old files.)
public enum Compression
{
None,
Deflate,
LZMA
}
public Lemmatizer(BinaryReader binRead)
{
var compr = (Compression)binRead.ReadByte();
if (compr == Compression.None)
Deserialize(binRead);
else
throw new Exception("Loading lemmatizer with binary reader on uncompressed stream is not supported.");
}
public Lemmatizer(Stream streamIn)
{
Deserialize(streamIn);
}
public void Serialize(Stream streamOut)
{
Serialize(streamOut, true, Compression.None);
}
public void Serialize(Stream streamOut, bool bSerializeExamples)
{
Serialize(streamOut, bSerializeExamples, Compression.None);
}
public void Serialize(Stream streamOut, bool bSerializeExamples, Compression compress)
{
streamOut.WriteByte((byte)compress);
switch (compress)
{
case Compression.None:
SerializeNone(streamOut, bSerializeExamples);
break;
case Compression.Deflate:
SerializeDeflate(streamOut, bSerializeExamples);
break;
case Compression.LZMA:
SerializeLZMA(streamOut, bSerializeExamples);
break;
default:
break;
}
}
private void SerializeNone(Stream streamOut, bool bSerializeExamples)
{
using (var binWrt = new BinaryWriter(streamOut))
{
this.Serialize(binWrt, bSerializeExamples);
}
}
private void SerializeDeflate(Stream streamOut, bool bSerializeExamples)
{
using (var streamOutNew = new DeflateStream(streamOut, CompressionMode.Compress, true))
{
using (var binWrt = new BinaryWriter(streamOutNew))
{
this.Serialize(binWrt, bSerializeExamples);
binWrt.Flush();
binWrt.Close();
}
}
}
private void SerializeLZMA(Stream streamOut, bool bSerializeExamples)
{
CoderPropID[] propIDs =
{
CoderPropID.DictionarySize,
CoderPropID.PosStateBits,
CoderPropID.LitContextBits,
CoderPropID.LitPosBits,
CoderPropID.Algorithm,
CoderPropID.NumFastBytes,
CoderPropID.MatchFinder,
CoderPropID.EndMarker
};
Int32 dictionary = 1 << 23;
Int32 posStateBits = 2;
Int32 litContextBits = 3; // for normal files
Int32 litPosBits = 0;
Int32 algorithm = 2;
Int32 numFastBytes = 128;
var mf = "bt4";
var eos = false;
object[] properties =
{
(Int32)(dictionary),
(Int32)(posStateBits),
(Int32)(litContextBits),
(Int32)(litPosBits),
(Int32)(algorithm),
(Int32)(numFastBytes),
mf,
eos
};
using (var msTemp = new MemoryStream())
{
using (var binWrtTemp = new BinaryWriter(msTemp))
{
this.Serialize(binWrtTemp, bSerializeExamples);
msTemp.Position = 0;
var encoder = new SevenZip.Compression.LZMA.Encoder();
encoder.SetCoderProperties(propIDs, properties);
encoder.WriteCoderProperties(streamOut);
var fileSize = msTemp.Length;
for (int i = 0; i < 8; i++)
{
streamOut.WriteByte((Byte)(fileSize >> (8 * i)));
}
encoder.Code(msTemp, streamOut, -1, -1, null);
binWrtTemp.Close();
encoder = null;
}
msTemp.Close();
}
}
public void Deserialize(Stream streamIn)
{
var compr = (Compression)streamIn.ReadByte();
using (var streamInNew = Decompress(streamIn, compr))
{
using (var br = new BinaryReader(streamInNew))
{
Deserialize(br);
}
}
}
private Stream Decompress(Stream streamIn, Compression compress)
{
Stream streamInNew;
switch (compress)
{
case Compression.None:
default:
streamInNew = streamIn;
break;
case Compression.Deflate:
streamInNew = new DeflateStream(streamIn, CompressionMode.Decompress);
break;
case Compression.LZMA:
streamInNew = DecompressLZMA(streamIn);
break;
}
return streamInNew;
}
private Stream DecompressLZMA(Stream streamIn)
{
var properties = new byte[5];
if (streamIn.Read(properties, 0, 5) != 5)
throw new Exception("input .lzma is too short");
var decoder = new SevenZip.Compression.LZMA.Decoder();
decoder.SetDecoderProperties(properties);
long outSize = 0;
for (var i = 0; i < 8; i++)
{
var v = streamIn.ReadByte();
if (v < 0)
throw (new Exception("Can't Read 1"));
outSize |= ((long)(byte)v) << (8 * i);
}
var compressedSize = streamIn.Length - streamIn.Position;
var outStream = new MemoryStream();
decoder.Code(streamIn, outStream, compressedSize, outSize, null);
outStream.Seek(0, 0);
decoder = null;
return outStream;
}
#endregion
#region Serialization Functions (Latino)
#if LATINO
public void Save(Latino.BinarySerializer binWrt) {
lsett.Save(binWrt);
elExamples.Save(binWrt, true, false);
ltnRootNode.Save(binWrt);
if (lsett.bBuildFrontLemmatizer)
ltnRootNodeFront.Save(binWrt);
}
public void Load(Latino.BinarySerializer binRead) {
lsett = new LemmatizerSettings(binRead);
elExamples = new ExampleList(binRead, lsett);
if (!lsett.bBuildFrontLemmatizer) {
ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples, null);
}
else {
ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples.GetFrontRearExampleList(false) , null);
ltnRootNodeFront = new LemmaTreeNode(binRead, lsett, elExamples.GetFrontRearExampleList(true), null);
}
}
public Lemmatizer(Latino.BinarySerializer binRead) {
Load(binRead);
}
public void Save(Stream streamOut) {
Latino.BinarySerializer binWrt = new Latino.BinarySerializer(streamOut);
this.Save(binWrt);
binWrt.Close();
}
public void Load(Stream streamIn) {
Latino.BinarySerializer binRead = new Latino.BinarySerializer(streamIn);
Load(binRead);
binRead.Close();
}
public Lemmatizer(Stream streamIn, string sDummy) {
Load(streamIn);
}
#endif
#endregion
}
}

@ -1,143 +0,0 @@
using System;
using System.IO;
using System.Runtime.Serialization;
namespace LemmaSharp
{
/// <summary>
/// These are the lemmagen algorithm settings that affect speed/power of the learning and lemmatizing algorithm.
/// TODO this class will be probbably removed in the future.
/// </summary>
[Serializable]
public class LemmatizerSettings : ISerializable
{
#region Constructor(s)
public LemmatizerSettings()
{
}
#endregion
#region Sub-Structures
/// <summary>
/// How algorithm considers msd tags.
/// </summary>
public enum MsdConsideration
{
/// <summary>
/// Completely ignores mds tags (join examples with different tags and sum their weihgts).
/// </summary>
Ignore,
/// <summary>
/// Same examples with different msd's are not considered equal and joined.
/// </summary>
Distinct,
/// <summary>
/// Joins examples with different tags (concatenates all msd tags).
/// </summary>
JoinAll,
/// <summary>
/// Joins examples with different tags (concatenates just distinct msd tags - somehow slower).
/// </summary>
JoinDistinct,
/// <summary>
/// Joins examples with different tags (new tag is the left to right substring that all joined examples share).
/// </summary>
JoinSameSubstring
}
#endregion
#region Public Variables
/// <summary>
/// True if from string should be included in rule identifier ([from]->[to]). False if just length of from string is used ([#len]->[to]).
/// </summary>
public bool bUseFromInRules = true;
/// <summary>
/// Specification how algorithm considers msd tags.
/// </summary>
public MsdConsideration eMsdConsider = MsdConsideration.Distinct;
/// <summary>
/// How many of the best rules are kept in memory for each node. Zero means unlimited.
/// </summary>
public int iMaxRulesPerNode = 0;
/// <summary>
/// If true, than build proccess uses few more hevristics to build first left to right lemmatizer (lemmatizes front of the word)
/// </summary>
public bool bBuildFrontLemmatizer = false;
#endregion
#region Cloneable functions
public LemmatizerSettings CloneDeep()
{
return new LemmatizerSettings()
{
bUseFromInRules = this.bUseFromInRules,
eMsdConsider = this.eMsdConsider,
iMaxRulesPerNode = this.iMaxRulesPerNode,
bBuildFrontLemmatizer = this.bBuildFrontLemmatizer
};
}
#endregion
#region Serialization Functions (ISerializable)
public void GetObjectData(SerializationInfo info, StreamingContext context)
{
info.AddValue("bUseFromInRules", bUseFromInRules);
info.AddValue("eMsdConsider", eMsdConsider);
info.AddValue("iMaxRulesPerNode", iMaxRulesPerNode);
info.AddValue("bBuildFrontLemmatizer", bBuildFrontLemmatizer);
}
public LemmatizerSettings(SerializationInfo info, StreamingContext context)
{
bUseFromInRules = info.GetBoolean("bUseFromInRules");
eMsdConsider = (MsdConsideration)info.GetValue("eMsdConsider", typeof(MsdConsideration));
iMaxRulesPerNode = info.GetInt32("iMaxRulesPerNode");
bBuildFrontLemmatizer = info.GetBoolean("bBuildFrontLemmatizer");
}
#endregion
#region Serialization Functions (Binary)
public void Serialize(BinaryWriter binWrt)
{
binWrt.Write(bUseFromInRules);
binWrt.Write((int)eMsdConsider);
binWrt.Write(iMaxRulesPerNode);
binWrt.Write(bBuildFrontLemmatizer);
}
public void Deserialize(BinaryReader binRead)
{
bUseFromInRules = binRead.ReadBoolean();
eMsdConsider = (MsdConsideration)binRead.ReadInt32();
iMaxRulesPerNode = binRead.ReadInt32();
bBuildFrontLemmatizer = binRead.ReadBoolean();
}
public LemmatizerSettings(System.IO.BinaryReader binRead)
{
this.Deserialize(binRead);
}
#endregion
#region Serialization Functions (Latino)
#if LATINO
public void Save(Latino.BinarySerializer binWrt) {
binWrt.WriteBool(bUseFromInRules);
binWrt.WriteInt((int)eMsdConsider);
binWrt.WriteInt(iMaxRulesPerNode);
binWrt.WriteBool(bBuildFrontLemmatizer);
}
public void Load(Latino.BinarySerializer binRead) {
bUseFromInRules = binRead.ReadBool();
eMsdConsider = (MsdConsideration)binRead.ReadInt();
iMaxRulesPerNode = binRead.ReadInt();
bBuildFrontLemmatizer = binRead.ReadBool();
}
public LemmatizerSettings(Latino.BinarySerializer reader) {
Load(reader);
}
#endif
#endregion
}
}

@ -1,161 +0,0 @@
using System.Collections.Generic;
using System.IO;
namespace LemmaSharp
{
public class RuleList : Dictionary<string, LemmaRule>
{
#region Private Variables
private LemmatizerSettings lsett;
private LemmaRule lrDefaultRule;
#endregion
#region Constructor(s)
public RuleList(LemmatizerSettings lsett)
{
this.lsett = lsett;
lrDefaultRule = AddRule(new LemmaRule("", "", 0, lsett));
}
#endregion
#region Public Properties
public LemmaRule DefaultRule
{
get
{
return lrDefaultRule;
}
}
#endregion
#region Essential Class Functions
public LemmaRule AddRule(LemmaExample le)
{
return AddRule(new LemmaRule(le.Word, le.Lemma, this.Count, lsett));
}
private LemmaRule AddRule(LemmaRule lrRuleNew)
{
LemmaRule lrRuleReturn = null;
if (!this.TryGetValue(lrRuleNew.Signature, out lrRuleReturn))
{
lrRuleReturn = lrRuleNew;
this.Add(lrRuleReturn.Signature, lrRuleReturn);
}
return lrRuleReturn;
}
#endregion
#region Serialization Functions (Binary)
public void Serialize(BinaryWriter binWrt, bool bThisTopObject)
{
//save metadata
binWrt.Write(bThisTopObject);
//save value types --------------------------------------
//save refernce types if needed -------------------------
if (bThisTopObject)
lsett.Serialize(binWrt);
//save list items ---------------------------------------
var iCount = this.Count;
binWrt.Write(iCount);
foreach (var kvp in this)
{
binWrt.Write(kvp.Key);
kvp.Value.Serialize(binWrt, false);
}
//default rule is already saved in the list. Here just save its id.
binWrt.Write(lrDefaultRule.Signature);
}
public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett)
{
//load metadata
var bThisTopObject = binRead.ReadBoolean();
//load value types --------------------------------------
//load refernce types if needed -------------------------
if (bThisTopObject)
this.lsett = new LemmatizerSettings(binRead);
else
this.lsett = lsett;
//load list items ---------------------------------------
this.Clear();
int iCount = binRead.ReadInt32();
for (var iId = 0; iId < iCount; iId++)
{
var sKey = binRead.ReadString();
var lrVal = new LemmaRule(binRead, this.lsett);
this.Add(sKey, lrVal);
}
//link the default rule just Id was saved.
lrDefaultRule = this[binRead.ReadString()];
}
public RuleList(System.IO.BinaryReader binRead, LemmatizerSettings lsett)
{
this.Deserialize(binRead, lsett);
}
#endregion
#region Serialization Functions (Latino)
#if LATINO
public void Save(Latino.BinarySerializer binWrt, bool bThisTopObject) {
//save metadata
binWrt.WriteBool(bThisTopObject);
//save value types --------------------------------------
//save refernce types if needed -------------------------
if (bThisTopObject)
lsett.Save(binWrt);
//save list items ---------------------------------------
int iCount = this.Count;
binWrt.WriteInt(iCount);
foreach (KeyValuePair<string, LemmaRule> kvp in this) {
binWrt.WriteString(kvp.Key);
kvp.Value.Save(binWrt, false);
}
//default rule is already saved in the list. Here just save its id.
binWrt.WriteString(lrDefaultRule.Signature);
}
public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett) {
//load metadata
bool bThisTopObject = binRead.ReadBool();
//load value types --------------------------------------
//load refernce types if needed -------------------------
if (bThisTopObject)
this.lsett = new LemmatizerSettings(binRead);
else
this.lsett = lsett;
//load list items ---------------------------------------
this.Clear();
int iCount = binRead.ReadInt();
for (int iId = 0; iId < iCount; iId++) {
string sKey = binRead.ReadString();
LemmaRule lrVal = new LemmaRule(binRead, this.lsett);
this.Add(sKey, lrVal);
}
//link the default rule just Id was saved.
lrDefaultRule = this[binRead.ReadString()];
}
public RuleList(Latino.BinarySerializer binRead, LemmatizerSettings lsett) {
Load(binRead, lsett);
}
#endif
#endregion
}
}

@ -1,50 +0,0 @@
using System;
namespace LemmaSharp
{
[Serializable]
class RuleWeighted : IComparable<RuleWeighted>
{
#region Private Variables
private LemmaRule lrRule;
private double dWeight;
#endregion
#region Constructor(s)
public RuleWeighted(LemmaRule lrRule, double dWeight)
{
this.lrRule = lrRule;
this.dWeight = dWeight;
}
#endregion
#region Public Properties
public LemmaRule Rule
{
get { return lrRule; }
}
public double Weight
{
get { return dWeight; }
}
#endregion
#region Essential Class Functions (comparing objects, eg.: for sorting)
public int CompareTo(RuleWeighted rl)
{
if (this.dWeight < rl.dWeight) return 1;
if (this.dWeight > rl.dWeight) return -1;
if (this.lrRule.Id < rl.lrRule.Id) return 1;
if (this.lrRule.Id > rl.lrRule.Id) return -1;
return 0;
}
#endregion
#region Output & Serialization Functions
public override string ToString()
{
return string.Format("{0}{1:(0.00%)}", lrRule, dWeight);
}
#endregion
}
}

@ -1,9 +0,0 @@
using System.Runtime.Serialization;
namespace LemmaSharp
{
public interface ILemmatizer : ISerializable
{
string Lemmatize(string sWord);
}
}

@ -1,8 +0,0 @@
namespace LemmaSharp
{
public interface ILemmatizerModel
{
string Lemmatize(string sWord);
string ToString();
}
}

@ -1,12 +0,0 @@
namespace LemmaSharp
{
public interface ITrainableLemmatizer : ILemmatizer
{
ExampleList Examples { get; }
ILemmatizerModel Model { get; }
void AddExample(string sWord, string sLemma);
void AddExample(string sWord, string sLemma, double dWeight);
void AddExample(string sWord, string sLemma, double dWeight, string sMsd);
void BuildModel();
}
}

@ -1,539 +0,0 @@
/*==========================================================================;
*
* (c) 2004-08 JSI. All rights reserved.
*
* File: BinarySerializer.cs
* Version: 1.0
* Desc: Binary serializer
* Author: Miha Grcar
* Created on: Oct-2004
* Last modified: May-2008
* Revision: May-2008
*
***************************************************************************/
//Remark: Use this file as Latino compatibility checker. When it is included in
// the project it defines symbol LATINO, that should enable all Latino specific
// serialization functions. When excluded, this code will not be created and also
// following Latino namspace will not be added to the project.
using System;
using System.Runtime.InteropServices;
using System.Collections.Generic;
using System.Reflection;
using System.Text;
using System.IO;
#if LATINO
namespace Latino
{
/* .-----------------------------------------------------------------------
|
| Class BinarySerializer
|
'-----------------------------------------------------------------------
*/
public interface ISerializable {
// *** note that you need to implement a constructor that loads the instance if the class implements Latino.ISerializable
void Save(Latino.BinarySerializer writer);
}
public class BinarySerializer
{
private static Dictionary<string, string> m_full_to_short_type_name
= new Dictionary<string, string>();
private static Dictionary<string, string> m_short_to_full_type_name
= new Dictionary<string, string>();
private Stream m_stream;
private string m_data_dir
= ".";
private static void RegisterTypeName(string full_type_name, string short_type_name)
{
m_full_to_short_type_name.Add(full_type_name, short_type_name);
m_short_to_full_type_name.Add(short_type_name, full_type_name);
}
private static string GetFullTypeName(string short_type_name)
{
return m_short_to_full_type_name.ContainsKey(short_type_name) ? m_short_to_full_type_name[short_type_name] : short_type_name;
}
private static string GetShortTypeName(string full_type_name)
{
return m_full_to_short_type_name.ContainsKey(full_type_name) ? m_full_to_short_type_name[full_type_name] : full_type_name;
}
static BinarySerializer()
{
RegisterTypeName(typeof(bool).AssemblyQualifiedName, "b");
RegisterTypeName(typeof(byte).AssemblyQualifiedName, "ui1");
RegisterTypeName(typeof(sbyte).AssemblyQualifiedName, "i1");
RegisterTypeName(typeof(char).AssemblyQualifiedName, "c");
RegisterTypeName(typeof(double).AssemblyQualifiedName, "f8");
RegisterTypeName(typeof(float).AssemblyQualifiedName, "f4");
RegisterTypeName(typeof(int).AssemblyQualifiedName, "i4");
RegisterTypeName(typeof(uint).AssemblyQualifiedName, "ui4");
RegisterTypeName(typeof(long).AssemblyQualifiedName, "i8");
RegisterTypeName(typeof(ulong).AssemblyQualifiedName, "ui8");
RegisterTypeName(typeof(short).AssemblyQualifiedName, "i2");
RegisterTypeName(typeof(ushort).AssemblyQualifiedName, "ui2");
RegisterTypeName(typeof(string).AssemblyQualifiedName, "s");
}
public BinarySerializer(Stream stream)
{
//Utils.ThrowException(stream == null ? new ArgumentNullException("stream") : null);
m_stream = stream;
}
public BinarySerializer()
{
m_stream = new MemoryStream();
}
public BinarySerializer(string file_name, FileMode file_mode)
{
m_stream = new FileStream(file_name, file_mode); // throws ArgumentException, NotSupportedException, ArgumentNullException, SecurityException, FileNotFoundException, IOException, DirectoryNotFoundException, PathTooLongException, ArgumentOutOfRangeException
}
// *** Reading ***
private byte[] Read<T>() // Read<T>() is directly or indirectly called from several methods thus exceptions thrown here can also be thrown in all those methods
{
int sz = Marshal.SizeOf(typeof(T));
byte[] buffer = new byte[sz];
int num_bytes = m_stream.Read(buffer, 0, sz); // throws IOException, NotSupportedException, ObjectDisposedException
//Utils.ThrowException(num_bytes < sz ? new EndOfStreamException() : null);
return buffer;
}
public bool ReadBool()
{
return ReadByte() != 0;
}
public byte ReadByte() // ReadByte() is directly or indirectly called from several methods thus exceptions thrown here can also be thrown in all those methods
{
int val = m_stream.ReadByte(); // throws NotSupportedException, ObjectDisposedException
//Utils.ThrowException(val < 0 ? new EndOfStreamException() : null);
return (byte)val;
}
public sbyte ReadSByte()
{
return (sbyte)ReadByte();
}
private char ReadChar8()
{
return (char)ReadByte();
}
private char ReadChar16()
{
return BitConverter.ToChar(Read<ushort>(), 0);
}
public char ReadChar()
{
return ReadChar16();
}
public double ReadDouble()
{
return BitConverter.ToDouble(Read<double>(), 0);
}
public float ReadFloat()
{
return BitConverter.ToSingle(Read<float>(), 0);
}
public int ReadInt()
{
return BitConverter.ToInt32(Read<int>(), 0);
}
public uint ReadUInt()
{
return BitConverter.ToUInt32(Read<uint>(), 0);
}
public long ReadLong()
{
return BitConverter.ToInt64(Read<long>(), 0);
}
public ulong ReadULong()
{
return BitConverter.ToUInt64(Read<ulong>(), 0);
}
public short ReadShort()
{
return BitConverter.ToInt16(Read<short>(), 0);
}
public ushort ReadUShort()
{
return BitConverter.ToUInt16(Read<ushort>(), 0);
}
private string ReadString8()
{
int len = ReadInt();
if (len < 0) { return null; }
byte[] buffer = new byte[len];
m_stream.Read(buffer, 0, len); // throws IOException, NotSupportedException, ObjectDisposedException
return Encoding.ASCII.GetString(buffer);
}
private string ReadString16()
{
int len = ReadInt();
if (len < 0) { return null; }
byte[] buffer = new byte[len * 2];
m_stream.Read(buffer, 0, len * 2); // throws IOException, NotSupportedException, ObjectDisposedException
return Encoding.Unicode.GetString(buffer);
}
public string ReadString()
{
return ReadString16(); // throws exceptions (see ReadString16())
}
public Type ReadType()
{
string type_name = ReadString8(); // throws exceptions (see ReadString8())
//Utils.ThrowException(type_name == null ? new InvalidDataException() : null);
return Type.GetType(GetFullTypeName(type_name)); // throws TargetInvocationException, ArgumentException, TypeLoadException, FileNotFoundException, FileLoadException, BadImageFormatException
}
public ValueType ReadValue(Type type)
{
//Utils.ThrowException(type == null ? new ArgumentNullException("type") : null);
//Utils.ThrowException(!type.IsValueType ? new InvalidArgumentValueException("type") : null);
if (type == typeof(bool))
{
return ReadBool();
}
else if (type == typeof(byte))
{
return ReadByte();
}
else if (type == typeof(sbyte))
{
return ReadSByte();
}
else if (type == typeof(char))
{
return ReadChar();
}
else if (type == typeof(double))
{
return ReadDouble();
}
else if (type == typeof(float))
{
return ReadFloat();
}
else if (type == typeof(int))
{
return ReadInt();
}
else if (type == typeof(uint))
{
return ReadUInt();
}
else if (type == typeof(long))
{
return ReadLong();
}
else if (type == typeof(ulong))
{
return ReadULong();
}
else if (type == typeof(short))
{
return ReadShort();
}
else if (type == typeof(ushort))
{
return ReadUShort();
}
else if (typeof(Latino.ISerializable).IsAssignableFrom(type))
{
ConstructorInfo cxtor = type.GetConstructor(new Type[] { typeof(Latino.BinarySerializer) });
//Utils.ThrowException(cxtor == null ? new ArgumentNotSupportedException("type") : null);
return (ValueType)cxtor.Invoke(new object[] { this }); // throws MemberAccessException, MethodAccessException, TargetInvocationException, NotSupportedException, SecurityException
}
else
{
//throw new ArgumentNotSupportedException("type");
throw new Exception("type");
}
}
public T ReadValue<T>()
{
return (T)(object)ReadValue(typeof(T)); // throws exceptions (see ReadValue(Type type))
}
public object ReadObject(Type type)
{
//Utils.ThrowException(type == null ? new ArgumentNullException("type") : null);
switch (ReadByte())
{
case 0:
return null;
case 1:
break;
case 2:
Type type_0 = ReadType(); // throws exceptions (see ReadType())
//Utils.ThrowException(type_0 == null ? new TypeLoadException() : null);
//Utils.ThrowException(!type.IsAssignableFrom(type_0) ? new InvalidArgumentValueException("type") : null);
type = type_0;
break;
default:
throw new InvalidDataException();
}
if (type == typeof(string))
{
return ReadString();
}
else if (typeof(Latino.ISerializable).IsAssignableFrom(type))
{
ConstructorInfo cxtor = type.GetConstructor(new Type[] { typeof(Latino.BinarySerializer) });
//Utils.ThrowException(cxtor == null ? new ArgumentNotSupportedException("type") : null);
return cxtor.Invoke(new object[] { this }); // throws MemberAccessException, MethodAccessException, TargetInvocationException, NotSupportedException, SecurityException
}
else if (type.IsValueType)
{
return ReadValue(type); // throws exceptions (see ReadValue(Type type))
}
else
{
//throw new InvalidArgumentValueException("type");
throw new Exception("type");
}
}
public T ReadObject<T>()
{
return (T)ReadObject(typeof(T)); // throws exceptions (see ReadObject(Type type))
}
public object ReadValueOrObject(Type type)
{
//Utils.ThrowException(type == null ? new ArgumentNullException("type") : null);
if (type.IsValueType)
{
return ReadValue(type); // throws exceptions (see ReadValue(Type type))
}
else
{
return ReadObject(type); // throws exceptions (see ReadObject(Type type))
}
}
public T ReadValueOrObject<T>()
{
return (T)ReadValueOrObject(typeof(T)); // throws exceptions (see ReadValueOrObject(Type type))
}
// *** Writing ***
private void Write(byte[] data) // Write(byte[] data) is directly or indirectly called from several methods thus exceptions thrown here can also be thrown in all those methods
{
m_stream.Write(data, 0, data.Length); // throws IOException, NotSupportedException, ObjectDisposedException
}
public void WriteBool(bool val)
{
WriteByte(val ? (byte)1 : (byte)0);
}
public void WriteByte(byte val) // WriteByte(byte val) is directly or indirectly called from several methods thus exceptions thrown here can also be thrown in all those methods
{
m_stream.WriteByte(val); // throws IOException, NotSupportedException, ObjectDisposedException
}
public void WriteSByte(sbyte val)
{
WriteByte((byte)val);
}
private void WriteChar8(char val)
{
WriteByte(Encoding.ASCII.GetBytes(new char[] { val })[0]);
}
private void WriteChar16(char val)
{
Write(BitConverter.GetBytes((ushort)val));
}
public void WriteChar(char val)
{
WriteChar16(val);
}
public void WriteDouble(double val)
{
Write(BitConverter.GetBytes(val));
}
public void WriteFloat(float val)
{
Write(BitConverter.GetBytes(val));
}
public void WriteInt(int val)
{
Write(BitConverter.GetBytes(val));
}
public void WriteUInt(uint val)
{
Write(BitConverter.GetBytes(val));
}
public void WriteLong(long val)
{
Write(BitConverter.GetBytes(val));
}
public void WriteULong(ulong val)
{
Write(BitConverter.GetBytes(val));
}
public void WriteShort(short val)
{
Write(BitConverter.GetBytes(val));
}
public void WriteUShort(ushort val)
{
Write(BitConverter.GetBytes(val));
}
private void WriteString8(string val)
{
if (val == null) { WriteInt(-1); return; }
WriteInt(val.Length);
Write(Encoding.ASCII.GetBytes(val));
}
private void WriteString16(string val)
{
if (val == null) { WriteInt(-1); return; }
WriteInt(val.Length);
Write(Encoding.Unicode.GetBytes(val));
}
public void WriteString(string val)
{
WriteString16(val);
}
public void WriteValue(ValueType val)
{
if (val is bool)
{
WriteBool((bool)val);
}
else if (val is byte)
{
WriteByte((byte)val);
}
else if (val is sbyte)
{
WriteSByte((sbyte)val);
}
else if (val is char)
{
WriteChar((char)val);
}
else if (val is double)
{
WriteDouble((double)val);
}
else if (val is float)
{
WriteFloat((float)val);
}
else if (val is int)
{
WriteInt((int)val);
}
else if (val is uint)
{
WriteUInt((uint)val);
}
else if (val is long)
{
WriteLong((long)val);
}
else if (val is ulong)
{
WriteULong((ulong)val);
}
else if (val is short)
{
WriteShort((short)val);
}
else if (val is ushort)
{
WriteUShort((ushort)val);
}
else if (val is Latino.ISerializable)
{
((Latino.ISerializable)val).Save(this); // throws serialization-related exceptions
}
else
{
//throw new ArgumentTypeException("val");
}
}
public void WriteObject(Type type, object obj)
{
//Utils.ThrowException(type == null ? new ArgumentNullException("type") : null);
//Utils.ThrowException((obj != null && !type.IsAssignableFrom(obj.GetType())) ? new ArgumentTypeException("obj") : null);
if (obj == null)
{
WriteByte(0);
}
else
{
Type obj_type = obj.GetType();
if (obj_type == type)
{
WriteByte(1);
}
else
{
WriteByte(2);
WriteType(obj_type);
}
if (obj is string)
{
WriteString((string)obj);
}
else if (obj is Latino.ISerializable)
{
((Latino.ISerializable)obj).Save(this); // throws serialization-related exceptions
}
else if (obj is ValueType)
{
WriteValue((ValueType)obj); // throws exceptions (see WriteValue(ValueType val))
}
else
{
//throw new ArgumentTypeException("obj");
}
}
}
public void WriteObject<T>(T obj)
{
WriteObject(typeof(T), obj); // throws exceptions (see WriteObject(Type type, object obj))
}
public void WriteValueOrObject(Type type, object obj)
{
//Utils.ThrowException(type == null ? new ArgumentNullException("type") : null);
//Utils.ThrowException(!type.IsAssignableFrom(obj.GetType()) ? new ArgumentTypeException("obj") : null);
if (type.IsValueType)
{
WriteValue((ValueType)obj); // throws exceptions (see WriteValue(ValueType val))
}
else
{
WriteObject(type, obj); // throws exceptions (see WriteObject(Type type, object obj))
}
}
public void WriteValueOrObject<T>(T obj)
{
WriteValueOrObject(typeof(T), obj); // throws exceptions (see WriteValueOrObject(Type type, object obj))
}
public void WriteType(Type type)
{
//Utils.ThrowException(type == null ? new ArgumentNullException("type") : null);
WriteString8(GetShortTypeName(type.AssemblyQualifiedName));
}
// *** Data directory ***
public string DataDir
{
get { return m_data_dir; }
set
{
//Utils.ThrowException(!Utils.VerifyPathName(value, /*must_exist=*/true) ? new InvalidArgumentValueException("DataDir") : null);
m_data_dir = value;
}
}
// *** Access to the associated stream ***
public void Close()
{
m_stream.Close();
}
public void Flush()
{
m_stream.Flush(); // throws IOException
}
public Stream Stream
{
get { return m_stream; }
}
}
}
#endif

@ -1,165 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProductVersion>9.0.21022</ProductVersion>
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>{A39293C1-92D8-47B9-93A4-41F443B4F9E4}</ProjectGuid>
<OutputType>Library</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>LemmaSharp</RootNamespace>
<AssemblyName>LemmaSharp</AssemblyName>
<TargetFrameworkVersion>v4.7</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<IsWebBootstrapper>true</IsWebBootstrapper>
<StartupObject>
</StartupObject>
<FileUpgradeFlags>
</FileUpgradeFlags>
<UpgradeBackupLocation>
</UpgradeBackupLocation>
<OldToolsVersion>3.5</OldToolsVersion>
<TargetFrameworkProfile />
<PublishUrl>http://localhost/LemmaSharp/</PublishUrl>
<Install>true</Install>
<InstallFrom>Web</InstallFrom>
<UpdateEnabled>true</UpdateEnabled>
<UpdateMode>Foreground</UpdateMode>
<UpdateInterval>7</UpdateInterval>
<UpdateIntervalUnits>Days</UpdateIntervalUnits>
<UpdatePeriodically>false</UpdatePeriodically>
<UpdateRequired>false</UpdateRequired>
<MapFileExtensions>true</MapFileExtensions>
<ApplicationRevision>0</ApplicationRevision>
<ApplicationVersion>1.0.0.%2a</ApplicationVersion>
<UseApplicationTrust>false</UseApplicationTrust>
<BootstrapperEnabled>true</BootstrapperEnabled>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>TRACE;DEBUG;NOLATINO</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<Prefer32Bit>false</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE;NOLATINO</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<Prefer32Bit>false</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">
<DebugSymbols>true</DebugSymbols>
<OutputPath>bin\x86\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DebugType>full</DebugType>
<PlatformTarget>x86</PlatformTarget>
<CodeAnalysisUseTypeNameInSuppression>true</CodeAnalysisUseTypeNameInSuppression>
<CodeAnalysisModuleSuppressionsFile>GlobalSuppressions.cs</CodeAnalysisModuleSuppressionsFile>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>false</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' ">
<OutputPath>bin\x86\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<Optimize>true</Optimize>
<DebugType>pdbonly</DebugType>
<PlatformTarget>x86</PlatformTarget>
<CodeAnalysisUseTypeNameInSuppression>true</CodeAnalysisUseTypeNameInSuppression>
<CodeAnalysisModuleSuppressionsFile>GlobalSuppressions.cs</CodeAnalysisModuleSuppressionsFile>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>false</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x64' ">
<DebugSymbols>true</DebugSymbols>
<OutputPath>bin\x64\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DebugType>full</DebugType>
<PlatformTarget>x64</PlatformTarget>
<CodeAnalysisUseTypeNameInSuppression>true</CodeAnalysisUseTypeNameInSuppression>
<CodeAnalysisModuleSuppressionsFile>GlobalSuppressions.cs</CodeAnalysisModuleSuppressionsFile>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>false</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x64' ">
<OutputPath>bin\x64\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<Optimize>true</Optimize>
<DebugType>pdbonly</DebugType>
<PlatformTarget>x64</PlatformTarget>
<CodeAnalysisUseTypeNameInSuppression>true</CodeAnalysisUseTypeNameInSuppression>
<CodeAnalysisModuleSuppressionsFile>GlobalSuppressions.cs</CodeAnalysisModuleSuppressionsFile>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>false</Prefer32Bit>
</PropertyGroup>
<ItemGroup>
<Reference Include="Lzma#, Version=4.12.3884.11200, Culture=neutral, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>ExternalLibs\Lzma#.dll</HintPath>
</Reference>
<Reference Include="System" />
<Reference Include="System.Data" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="LatinoCompatibility\BinarySerializer.cs">
<SubType>Code</SubType>
</Compile>
<Compile Include="Interfaces\ILemmatizer.cs" />
<Compile Include="Interfaces\ILemmatizerModel.cs" />
<Compile Include="Interfaces\ILemmatizerTrainable.cs" />
<Compile Include="Classes\LemmatizerSettings.cs" />
<Compile Include="Classes\LemmaRule.cs" />
<Compile Include="Classes\Lemmatizer.cs" />
<Compile Include="Classes\LemmaTreeNode.cs" />
<Compile Include="Classes\LemmaExample.cs" />
<Compile Include="Classes\ExampleList.cs" />
<Compile Include="Classes\RuleList.cs" />
<Compile Include="Classes\RuleWeighted.cs" />
</ItemGroup>
<ItemGroup>
<BootstrapperPackage Include="Microsoft.Net.Client.3.5">
<Visible>False</Visible>
<ProductName>.NET Framework Client Profile</ProductName>
<Install>false</Install>
</BootstrapperPackage>
<BootstrapperPackage Include="Microsoft.Net.Framework.2.0">
<Visible>False</Visible>
<ProductName>.NET Framework 2.0 %28x86%29</ProductName>
<Install>true</Install>
</BootstrapperPackage>
<BootstrapperPackage Include="Microsoft.Net.Framework.3.0">
<Visible>False</Visible>
<ProductName>.NET Framework 3.0 %28x86%29</ProductName>
<Install>false</Install>
</BootstrapperPackage>
<BootstrapperPackage Include="Microsoft.Net.Framework.3.5">
<Visible>False</Visible>
<ProductName>.NET Framework 3.5</ProductName>
<Install>false</Install>
</BootstrapperPackage>
<BootstrapperPackage Include="Microsoft.Net.Framework.3.5.SP1">
<Visible>False</Visible>
<ProductName>.NET Framework 3.5 SP1</ProductName>
<Install>false</Install>
</BootstrapperPackage>
</ItemGroup>
<ItemGroup>
<Folder Include="Properties\" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>

@ -1,34 +0,0 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 14
VisualStudioVersion = 14.0.25420.1
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LemmaSharp", "LemmaSharp\LemmaSharp.csproj", "{A39293C1-92D8-47B9-93A4-41F443B4F9E4}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Debug|x64 = Debug|x64
Debug|x86 = Debug|x86
Release|Any CPU = Release|Any CPU
Release|x64 = Release|x64
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x64.ActiveCfg = Debug|x64
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x64.Build.0 = Debug|x64
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x86.ActiveCfg = Debug|x86
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x86.Build.0 = Debug|x86
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|Any CPU.Build.0 = Release|Any CPU
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x64.ActiveCfg = Release|x64
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x64.Build.0 = Release|x64
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x86.ActiveCfg = Release|x86
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x86.Build.0 = Release|x86
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

@ -1,381 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Runtime.Serialization;
using System.Text;
namespace LemmaSharp
{
[Serializable]
public class ExampleList : ISerializable
{
#region Private Variables
private LemmatizerSettings lsett;
private RuleList rlRules;
private Dictionary<string, LemmaExample> dictExamples;
private List<LemmaExample> lstExamples;
#endregion
#region Constructor(s)
public ExampleList(LemmatizerSettings lsett) : base()
{
this.lsett = lsett;
this.dictExamples = new Dictionary<string, LemmaExample>();
this.lstExamples = null;
this.rlRules = new RuleList(lsett);
}
public ExampleList(StreamReader srIn, string sFormat, LemmatizerSettings lsett) : this(lsett)
{
AddMultextFile(srIn, sFormat);
}
#endregion
#region Public Properties & Indexers
public LemmaExample this[int i]
{
get
{
if (lstExamples == null)
FinalizeAdditions();
return lstExamples[i];
}
}
public int Count
{
get
{
if (lstExamples == null)
FinalizeAdditions();
return lstExamples.Count;
}
}
public double WeightSum
{
get
{
if (lstExamples == null)
FinalizeAdditions();
double dWeight = 0;
foreach (LemmaExample exm in lstExamples)
dWeight += exm.Weight;
return dWeight;
}
}
public RuleList Rules
{
get
{
return rlRules;
}
}
public List<LemmaExample> ListExamples
{
get
{
if (lstExamples == null)
FinalizeAdditions();
return lstExamples;
}
}
#endregion
#region Essential Class Functions (adding/removing examples)
public void AddMultextFile(StreamReader srIn, string sFormat)
{
//read from file
string sLine = null;
int iError = 0;
int iLine = 0;
var iW = sFormat.IndexOf('W');
var iL = sFormat.IndexOf('L');
var iM = sFormat.IndexOf('M');
var iF = sFormat.IndexOf('F');
var iLen = Math.Max(Math.Max(iW, iL), Math.Max(iM, iF)) + 1;
if (iW < 0 || iL < 0)
{
throw new Exception("Can not find word and lemma location in the format specification");
}
while ((sLine = srIn.ReadLine()) != null && iError < 50)
{
iLine++;
string[] asWords = sLine.Split(new char[] { '\t' });
if (asWords.Length < iLen)
{
//Console.WriteLine("ERROR: Line doesn't confirm to the given format \"" + sFormat + "\"! Line " + iLine.ToString() + ".");
iError++;
continue;
}
var sWord = asWords[iW];
var sLemma = asWords[iL];
if (sLemma.Equals("=", StringComparison.Ordinal))
sLemma = sWord;
string sMsd = null;
if (iM > -1)
sMsd = asWords[iM];
double dWeight = 1; ;
if (iF > -1)
Double.TryParse(asWords[iM], out dWeight);
AddExample(sWord, sLemma, dWeight, sMsd);
}
if (iError == 50)
throw new Exception("Parsing stopped because of too many (50) errors. Check format specification");
}
public LemmaExample AddExample(string sWord, string sLemma, double dWeight, string sMsd)
{
string sNewMsd = lsett.eMsdConsider != LemmatizerSettings.MsdConsideration.Ignore
? sMsd
: null;
var leNew = new LemmaExample(sWord, sLemma, dWeight, sNewMsd, rlRules, lsett);
return Add(leNew);
}
private LemmaExample Add(LemmaExample leNew)
{
LemmaExample leReturn = null;
if (!dictExamples.TryGetValue(leNew.Signature, out leReturn))
{
leReturn = leNew;
dictExamples.Add(leReturn.Signature, leReturn);
}
else
leReturn.Join(leNew);
lstExamples = null;
return leReturn;
}
public void DropExamples()
{
dictExamples.Clear();
lstExamples = null;
}
public void FinalizeAdditions()
{
if (lstExamples != null)
return;
lstExamples = new List<LemmaExample>(dictExamples.Values);
lstExamples.Sort();
}
public ExampleList GetFrontRearExampleList(bool front)
{
var elExamplesNew = new ExampleList(lsett);
foreach (var le in this.ListExamples)
{
if (front)
elExamplesNew.AddExample(le.WordFront, le.LemmaFront, le.Weight, le.Msd);
else
elExamplesNew.AddExample(le.WordRear, le.LemmaRear, le.Weight, le.Msd);
}
elExamplesNew.FinalizeAdditions();
return elExamplesNew;
}
#endregion
#region Output Functions (ToString)
public override string ToString()
{
var sb = new StringBuilder();
foreach (var exm in lstExamples)
{
sb.AppendLine(exm.ToString());
}
return sb.ToString();
}
#endregion
#region Serialization Functions (.Net Default - ISerializable)
public void GetObjectData(SerializationInfo info, StreamingContext context)
{
info.AddValue("lsett", lsett);
info.AddValue("iNumExamples", dictExamples.Count);
var aWords = new string[dictExamples.Count];
var aLemmas = new string[dictExamples.Count];
var aWeights = new double[dictExamples.Count];
var aMsds = new string[dictExamples.Count];
int iExm = 0;
foreach (var exm in dictExamples.Values)
{
aWords[iExm] = exm.Word;
aLemmas[iExm] = exm.Lemma;
aWeights[iExm] = exm.Weight;
aMsds[iExm] = exm.Msd;
iExm++;
}
info.AddValue("aWords", aWords);
info.AddValue("aLemmas", aLemmas);
info.AddValue("aWeights", aWeights);
info.AddValue("aMsds", aMsds);
}
public ExampleList(SerializationInfo info, StreamingContext context)
{
lsett = (LemmatizerSettings)info.GetValue("lsett", typeof(LemmatizerSettings));
this.dictExamples = new Dictionary<string, LemmaExample>();
this.lstExamples = null;
this.rlRules = new RuleList(lsett);
var aWords = (string[])info.GetValue("aWords", typeof(string[]));
var aLemmas = (string[])info.GetValue("aLemmas", typeof(string[]));
var aWeights = (double[])info.GetValue("aWeights", typeof(double[]));
var aMsds = (string[])info.GetValue("aMsds", typeof(string[]));
for (int iExm = 0; iExm < aWords.Length; iExm++)
AddExample(aWords[iExm], aLemmas[iExm], aWeights[iExm], aMsds[iExm]);
}
#endregion
#region Serialization Functions (Binary)
public void Serialize(BinaryWriter binWrt, bool bSerializeExamples, bool bThisTopObject)
{
//save metadata
binWrt.Write(bThisTopObject);
//save refernce types if needed -------------------------
if (bThisTopObject)
lsett.Serialize(binWrt);
rlRules.Serialize(binWrt, false);
if (!bSerializeExamples)
{
binWrt.Write(false); // lstExamples == null
binWrt.Write(0); // dictExamples.Count == 0
}
else
{
if (lstExamples == null)
{
binWrt.Write(false); // lstExamples == null
//save dictionary items
int iCount = dictExamples.Count;
binWrt.Write(iCount);
foreach (var kvp in dictExamples)
{
binWrt.Write(kvp.Value.Rule.Signature);
kvp.Value.Serialize(binWrt, false);
}
}
else
{
binWrt.Write(true); // lstExamples != null
//save list & dictionary items
var iCount = lstExamples.Count;
binWrt.Write(iCount);
foreach (var le in lstExamples)
{
binWrt.Write(le.Rule.Signature);
le.Serialize(binWrt, false);
}
}
}
}
public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett)
{
//load metadata
var bThisTopObject = binRead.ReadBoolean();
//load refernce types if needed -------------------------
if (bThisTopObject)
this.lsett = new LemmatizerSettings(binRead);
else
this.lsett = lsett;
rlRules = new RuleList(binRead, this.lsett);
var bCreateLstExamples = binRead.ReadBoolean();
lstExamples = bCreateLstExamples ? new List<LemmaExample>() : null;
dictExamples = new Dictionary<string, LemmaExample>();
//load dictionary items
var iCount = binRead.ReadInt32();
for (var iId = 0; iId < iCount; iId++)
{
var lrRule = rlRules[binRead.ReadString()];
var le = new LemmaExample(binRead, this.lsett, lrRule);
dictExamples.Add(le.Signature, le);
if (bCreateLstExamples)
lstExamples.Add(le);
}
}
public ExampleList(BinaryReader binRead, LemmatizerSettings lsett)
{
Deserialize(binRead, lsett);
}
#endregion
#region Serialization Functions (Latino)
#if LATINO
public void Save(Latino.BinarySerializer binWrt, bool bSerializeExamples, bool bThisTopObject) {
//save metadata
binWrt.WriteBool(bThisTopObject);
//save refernce types if needed -------------------------
if (bThisTopObject)
lsett.Save(binWrt);
rlRules.Save(binWrt, false);
if (!bSerializeExamples) {
binWrt.WriteBool(false); // lstExamples == null
binWrt.WriteInt(0); // dictExamples.Count == 0
}
else {
if (lstExamples == null) {
binWrt.WriteBool(false); // lstExamples == null
//save dictionary items
int iCount = dictExamples.Count;
binWrt.WriteInt(iCount);
foreach (KeyValuePair<string, LemmaExample> kvp in dictExamples) {
binWrt.WriteString(kvp.Value.Rule.Signature);
kvp.Value.Save(binWrt, false);
}
}
else {
binWrt.WriteBool(true); // lstExamples != null
//save list & dictionary items
int iCount = lstExamples.Count;
binWrt.WriteInt(iCount);
foreach (LemmaExample le in lstExamples) {
binWrt.WriteString(le.Rule.Signature);
le.Save(binWrt, false);
}
}
}
}
public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett) {
//load metadata
bool bThisTopObject = binRead.ReadBool();
//load refernce types if needed -------------------------
if (bThisTopObject)
this.lsett = new LemmatizerSettings(binRead);
else
this.lsett = lsett;
rlRules = new RuleList(binRead, this.lsett);
bool bCreateLstExamples = binRead.ReadBool();
lstExamples = bCreateLstExamples ? new List<LemmaExample>() : null;
dictExamples = new Dictionary<string, LemmaExample>();
//load dictionary items
int iCount = binRead.ReadInt();
for (int iId = 0; iId < iCount; iId++) {
LemmaRule lrRule = rlRules[binRead.ReadString()];
LemmaExample le = new LemmaExample(binRead, this.lsett, lrRule);
dictExamples.Add(le.Signature, le);
if (bCreateLstExamples) lstExamples.Add(le);
}
}
public ExampleList(Latino.BinarySerializer binRead, LemmatizerSettings lsett) {
Load(binRead, lsett);
}
#endif
#endregion
}
}

@ -1,481 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace LemmaSharp
{
public class LemmaExample : IComparable<LemmaExample>, IComparer<LemmaExample>
{
#region Private Variables
private string sWord;
private string sLemma;
private string sSignature;
private string sMsd;
private double dWeight;
private LemmaRule lrRule;
private LemmatizerSettings lsett;
private string sWordRearCache;
private string sWordFrontCache;
private string sLemmaFrontCache;
#endregion
#region Constructor(s)
public LemmaExample(string sWord, string sLemma, double dWeight, string sMsd, RuleList rlRules, LemmatizerSettings lsett)
{
this.lsett = lsett;
this.sWord = sWord;
this.sLemma = sLemma;
this.sMsd = sMsd;
this.dWeight = dWeight;
this.lrRule = rlRules.AddRule(this);
switch (lsett.eMsdConsider)
{
case LemmatizerSettings.MsdConsideration.Ignore:
case LemmatizerSettings.MsdConsideration.JoinAll:
case LemmatizerSettings.MsdConsideration.JoinDistinct:
case LemmatizerSettings.MsdConsideration.JoinSameSubstring:
sSignature = string.Format("[{0}]==>[{1}]", sWord, sLemma);
break;
case LemmatizerSettings.MsdConsideration.Distinct:
default:
sSignature = string.Format("[{0}]==>[{1}]({2})", sWord, sLemma, sMsd ?? "");
break;
}
this.sWordRearCache = null;
this.sWordFrontCache = null;
this.sLemmaFrontCache = null;
}
#endregion
#region Public Properties
public string Word
{
get
{
return sWord;
}
}
public string Lemma
{
get
{
return sLemma;
}
}
public string Msd
{
get
{
return sMsd;
}
}
public string Signature
{
get
{
return sSignature;
}
}
public double Weight
{
get
{
return dWeight;
}
}
public LemmaRule Rule
{
get
{
return lrRule;
}
}
/// <summary>
/// Word to be pre-lemmatized with Front-Lemmatizer into LemmaFront which is then lemmatized by standard Rear-Lemmatizer (Warning it is reversed)
/// </summary>
public string WordFront
{
get
{
if (sWordFrontCache == null)
sWordFrontCache = StringReverse(sWord);
return sWordFrontCache;
}
}
/// <summary>
/// Lemma to be produced by pre-lemmatizing with Front-Lemmatizer (Warning it is reversed)
/// </summary>
public string LemmaFront
{
get
{
if (sLemmaFrontCache == null)
sLemmaFrontCache = StringReverse(WordRear);
return sLemmaFrontCache;
}
}
/// <summary>
/// word to be lemmatized by standard Rear-Lemmatizer (it's beggining has been already modified by Front-Lemmatizer)
/// </summary>
public string WordRear
{
get
{
if (sWordRearCache == null)
{
int lemmaPos = 0, wordPos = 0;
var common = LongestCommonSubstring(sWord, sLemma, ref wordPos, ref lemmaPos);
sWordRearCache = lemmaPos == -1 ? sLemma : (sLemma.Substring(0, lemmaPos + common.Length) + sWord.Substring(wordPos + common.Length));
}
return sWordRearCache;
}
}
/// <summary>
/// lemma to be produced by standard Rear-Lemmatizer from WordRear
/// </summary>
public string LemmaRear
{
get
{
return sLemma;
}
}
#endregion
#region Essential Class Functions (joining two examples into one)
//TODO - this function is not totaly ok because sMsd should not be
//changed since it could be included in signature
public void Join(LemmaExample leJoin)
{
dWeight += leJoin.dWeight;
if (sMsd != null)
switch (lsett.eMsdConsider)
{
case LemmatizerSettings.MsdConsideration.Ignore:
sMsd = null;
break;
case LemmatizerSettings.MsdConsideration.Distinct:
break;
case LemmatizerSettings.MsdConsideration.JoinAll:
sMsd += "|" + leJoin.sMsd;
break;
case LemmatizerSettings.MsdConsideration.JoinDistinct:
var append = string.Format("|{0}", leJoin.sMsd);
if (false == sMsd.Equals(leJoin.sMsd, StringComparison.Ordinal) &&
sMsd.IndexOf(append) < 0)
{
sMsd += append;
}
break;
case LemmatizerSettings.MsdConsideration.JoinSameSubstring:
int iPos = 0;
var iMax = Math.Min(sMsd.Length, leJoin.sMsd.Length);
while (iPos < iMax && sMsd[iPos] == leJoin.sMsd[iPos])
iPos++;
sMsd = sMsd.Substring(0, iPos);
break;
default:
break;
}
}
#endregion
#region Essential Class Functions (calculating similarities betwen examples)
public int Similarity(LemmaExample le)
{
return Similarity(this, le);
}
public static int Similarity(LemmaExample le1, LemmaExample le2)
{
var sWord1 = le1.sWord;
var sWord2 = le2.sWord;
var iLen1 = sWord1.Length;
var iLen2 = sWord2.Length;
var iMaxLen = Math.Min(iLen1, iLen2);
for (var iPos = 1; iPos <= iMaxLen; iPos++)
{
if (sWord1[iLen1 - iPos] != sWord2[iLen2 - iPos])
return iPos - 1;
}
//TODO similarity should be bigger if two words are totaly equal
//if (sWord1 == sWord2)
// return iMaxLen + 1;
//else
return iMaxLen;
}
#endregion
#region Essential Class Functions (comparing examples - eg.: for sorting)
/// <summary>
/// Function used to comprare current MultextExample (ME) against argument ME.
/// Mainly used in for sorting lists of MEs.
/// </summary>
/// <param name="other"> MultextExample (ME) that we compare current ME against.</param>
/// <returns>1 if current ME is bigger, -1 if smaler and 0 if both are the same.</returns>
public int CompareTo(LemmaExample other)
{
var iComparison = CompareStrings(this.sWord, other.sWord, false);
if (iComparison != 0)
return iComparison;
iComparison = CompareStrings(this.sLemma, other.sLemma, true);
if (iComparison != 0)
return iComparison;
if (lsett.eMsdConsider == LemmatizerSettings.MsdConsideration.Distinct &&
this.sMsd != null && other.sMsd != null)
{
iComparison = CompareStrings(this.sMsd, other.sMsd, true);
if (iComparison != 0)
return iComparison;
}
return 0;
}
public int Compare(LemmaExample x, LemmaExample y)
{
return x.CompareTo(y);
}
public static int CompareStrings(string sStr1, string sStr2, bool bForward)
{
var iLen1 = sStr1.Length;
var iLen2 = sStr2.Length;
var iMaxLen = Math.Min(iLen1, iLen2);
if (bForward)
{
for (int iPos = 0; iPos < iMaxLen; iPos++)
{
if (sStr1[iPos] > sStr2[iPos])
return 1;
if (sStr1[iPos] < sStr2[iPos])
return -1;
}
}
else
{
for (int iPos = 1; iPos <= iMaxLen; iPos++)
{
if (sStr1[iLen1 - iPos] > sStr2[iLen2 - iPos])
return 1;
if (sStr1[iLen1 - iPos] < sStr2[iLen2 - iPos])
return -1;
}
}
if (iLen1 > iLen2)
return 1;
if (iLen1 < iLen2)
return -1;
return 0;
}
public static int EqualPrifixLen(string sStr1, string sStr2)
{
var iLen1 = sStr1.Length;
var iLen2 = sStr2.Length;
var iMaxLen = Math.Min(iLen1, iLen2);
for (var iPos = 0; iPos < iMaxLen; iPos++)
{
if (sStr1[iPos] != sStr2[iPos])
return iPos;
}
return iMaxLen;
}
public static string LongestCommonSubstring(string sStr1, string sStr2, ref int iPosInStr1, ref int iPosInStr2)
{
var l = new int[sStr1.Length + 1, sStr2.Length + 1];
int z = 0;
string ret = "";
iPosInStr1 = -1;
iPosInStr2 = -1;
for (var i = 0; i < sStr1.Length; i++)
{
for (var j = 0; j < sStr2.Length; j++)
{
if (sStr1[i] == sStr2[j])
{
if (i == 0 || j == 0)
{
l[i, j] = 1;
}
else
{
l[i, j] = l[i - 1, j - 1] + 1;
}
if (l[i, j] > z)
{
z = l[i, j];
iPosInStr1 = i - z + 1;
iPosInStr2 = j - z + 1;
ret = sStr1.Substring(i - z + 1, z);
}
}
}
}
return ret;
}
public static string StringReverse(string s)
{
if (s == null)
return null;
var charArray = new char[s.Length];
var len = s.Length >> 1;
for (var i = 0; i < len; i++, len--)
{
charArray[i] = s[len];
charArray[len] = s[i];
}
return new string(charArray);
}
#endregion
#region Output Functions (ToString)
public override string ToString()
{
var sb = new StringBuilder();
if (sWord != null)
sb.AppendFormat("W:\"{0}\" ", sWord);
if (sLemma != null)
sb.AppendFormat("L:\"{0}\" ", sLemma);
if (sMsd != null)
sb.AppendFormat("M:\"{0}\" ", sMsd);
if (false == Double.IsNaN(dWeight))
sb.AppendFormat("F:\"{0}\" ", dWeight);
if (lrRule != null)
sb.AppendFormat("R:{0} ", lrRule);
if (sb.Length > 0)
return sb.ToString(0, sb.Length - 1);
return string.Empty;
}
#endregion
#region Serialization Functions (Binary)
public void Serialize(BinaryWriter binWrt, bool bThisTopObject)
{
//save metadata
binWrt.Write(bThisTopObject);
//save value types --------------------------------------
binWrt.Write(sWord);
binWrt.Write(sLemma);
binWrt.Write(sSignature);
if (sMsd == null)
{
binWrt.Write(false);
}
else
{
binWrt.Write(true);
binWrt.Write(sMsd);
}
binWrt.Write(dWeight);
//save refernce types if needed -------------------------
if (bThisTopObject)
{
lsett.Serialize(binWrt);
lrRule.Serialize(binWrt, false);
}
}
public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett, LemmaRule lrRule)
{
//load metadata
var bThisTopObject = binRead.ReadBoolean();
//load value types --------------------------------------
sWord = binRead.ReadString();
sLemma = binRead.ReadString();
sSignature = binRead.ReadString();
if (binRead.ReadBoolean())
sMsd = binRead.ReadString();
else
sMsd = null;
dWeight = binRead.ReadDouble();
//load refernce types if needed -------------------------
if (bThisTopObject)
{
this.lsett = new LemmatizerSettings(binRead);
this.lrRule = new LemmaRule(binRead, this.lsett);
}
else
{
this.lsett = lsett;
this.lrRule = lrRule;
}
this.sWordRearCache = null;
this.sWordFrontCache = null;
this.sLemmaFrontCache = null;
}
public LemmaExample(BinaryReader binRead, LemmatizerSettings lsett, LemmaRule lrRule)
{
Deserialize(binRead, lsett, lrRule);
}
#endregion
#region Serialization Functions (Latino)
#if LATINO
public void Save(Latino.BinarySerializer binWrt, bool bThisTopObject) {
//save metadata
binWrt.WriteBool(bThisTopObject);
//save value types --------------------------------------
binWrt.WriteString(sWord);
binWrt.WriteString(sLemma);
binWrt.WriteString(sSignature);
if (sMsd == null)
binWrt.WriteBool(false);
else {
binWrt.WriteBool(true);
binWrt.WriteString(sMsd);
}
binWrt.WriteDouble(dWeight);
//save refernce types if needed -------------------------
if (bThisTopObject) {
lsett.Save(binWrt);
lrRule.Save(binWrt, false);
}
}
public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett, LemmaRule lrRule) {
//load metadata
bool bThisTopObject = binRead.ReadBool();
//load value types --------------------------------------
sWord = binRead.ReadString();
sLemma = binRead.ReadString();
sSignature = binRead.ReadString();
if (binRead.ReadBool())
sMsd = binRead.ReadString();
else
sMsd = null;
dWeight = binRead.ReadDouble();
//load refernce types if needed -------------------------
if (bThisTopObject) {
this.lsett = new LemmatizerSettings(binRead);
this.lrRule = new LemmaRule(binRead, this.lsett);
}
else {
this.lsett = lsett;
this.lrRule = lrRule;
}
}
public LemmaExample(Latino.BinarySerializer binRead, LemmatizerSettings lsett, LemmaRule lrRule) {
Load(binRead, lsett, lrRule);
}
#endif
#endregion
}
}

@ -1,189 +0,0 @@
using System;
using System.IO;
namespace LemmaSharp
{
public class LemmaRule
{
#region Private Variables
private int iId;
private int iFrom;
private string sFrom;
private string sTo;
private string sSignature;
private LemmatizerSettings lsett;
#endregion
#region Constructor(s)
public LemmaRule(string sWord, string sLemma, int iId, LemmatizerSettings lsett)
{
this.lsett = lsett;
this.iId = iId;
int iSameStem = SameStem(sWord, sLemma);
sTo = sLemma.Substring(iSameStem);
iFrom = sWord.Length - iSameStem;
if (lsett.bUseFromInRules)
{
sFrom = sWord.Substring(iSameStem);
sSignature = string.Format("[{0}]==>[{1}]", sFrom, sTo);
}
else
{
sFrom = null;
sSignature = string.Format("[#{0}]==>[{1}]", iFrom, sTo);
}
}
#endregion
#region Public Properties
public string Signature
{
get
{
return sSignature;
}
}
public int Id
{
get
{
return iId;
}
}
#endregion
#region Essential Class Functions
private static int SameStem(string sStr1, string sStr2)
{
var iLen1 = sStr1.Length;
var iLen2 = sStr2.Length;
var iMaxLen = Math.Min(iLen1, iLen2);
for (var iPos = 0; iPos < iMaxLen; iPos++)
{
if (sStr1[iPos] != sStr2[iPos])
return iPos;
}
return iMaxLen;
}
public bool IsApplicableToGroup(int iGroupCondLen)
{
return iGroupCondLen >= iFrom;
}
public string Lemmatize(string sWord)
{
return sWord.Substring(0, sWord.Length - iFrom) + sTo;
}
#endregion
#region Output Functions (ToString)
public override string ToString()
{
return string.Format("{0}:{1}", iId, sSignature);
}
#endregion
#region Serialization Functions (Binary)
public void Serialize(BinaryWriter binWrt, bool bThisTopObject)
{
//save metadata
binWrt.Write(bThisTopObject);
//save value types --------------------------------------
binWrt.Write(iId);
binWrt.Write(iFrom);
if (sFrom == null)
binWrt.Write(false);
else
{
binWrt.Write(true);
binWrt.Write(sFrom);
}
binWrt.Write(sTo);
binWrt.Write(sSignature);
if (bThisTopObject)
lsett.Serialize(binWrt);
}
public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett)
{
//load metadata
var bThisTopObject = binRead.ReadBoolean();
//load value types --------------------------------------
iId = binRead.ReadInt32();
iFrom = binRead.ReadInt32();
if (binRead.ReadBoolean())
{
sFrom = binRead.ReadString();
}
else
{
sFrom = null;
}
sTo = binRead.ReadString();
sSignature = binRead.ReadString();
//load refernce types if needed -------------------------
if (bThisTopObject)
this.lsett = new LemmatizerSettings(binRead);
else
this.lsett = lsett;
}
public LemmaRule(System.IO.BinaryReader binRead, LemmatizerSettings lsett)
{
this.Deserialize(binRead, lsett);
}
#endregion
#region Serialization Functions (Latino)
#if LATINO
public void Save(Latino.BinarySerializer binWrt, bool bThisTopObject) {
//save metadata
binWrt.WriteBool(bThisTopObject);
//save value types --------------------------------------
binWrt.WriteInt(iId);
binWrt.WriteInt(iFrom);
if (sFrom == null)
binWrt.WriteBool(false);
else {
binWrt.WriteBool(true);
binWrt.WriteString(sFrom);
}
binWrt.WriteString(sTo);
binWrt.WriteString(sSignature);
if (bThisTopObject)
lsett.Save(binWrt);
}
public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett) {
//load metadata
bool bThisTopObject = binRead.ReadBool();
//load value types --------------------------------------
iId = binRead.ReadInt();
iFrom = binRead.ReadInt();
if (binRead.ReadBool())
sFrom = binRead.ReadString();
else
sFrom = null;
sTo = binRead.ReadString();
sSignature = binRead.ReadString();
//load refernce types if needed -------------------------
if (bThisTopObject)
this.lsett = new LemmatizerSettings(binRead);
else
this.lsett = lsett;
}
public LemmaRule(Latino.BinarySerializer binRead, LemmatizerSettings lsett) {
Load(binRead, lsett);
}
#endif
#endregion
}
}

@ -1,478 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace LemmaSharp
{
[Serializable]
public class LemmaTreeNode : ILemmatizerModel
{
#region Private Variables
//settings
private LemmatizerSettings lsett;
//tree structure references
private Dictionary<char, LemmaTreeNode> dictSubNodes;
private LemmaTreeNode ltnParentNode;
//essential node properties
private int iSimilarity; //similarity among all words in this node
private string sCondition; //suffix that must match in order to lemmatize
private bool bWholeWord; //true if condition has to match to whole word
//rules and weights;
private LemmaRule lrBestRule; //the best rule to be applied when lemmatizing
private RuleWeighted[] aBestRules; //list of best rules
private double dWeight;
//source of this node
private int iStart;
private int iEnd;
private ExampleList elExamples;
#endregion
#region Constructor(s) & Destructor(s)
private LemmaTreeNode(LemmatizerSettings lsett)
{
this.lsett = lsett;
}
public LemmaTreeNode(LemmatizerSettings lsett, ExampleList elExamples)
: this(lsett, elExamples, 0, elExamples.Count - 1, null)
{
}
/// <summary>
///
/// </summary>
/// <param name="lsett"></param>
/// <param name="elExamples"></param>
/// <param name="iStart">Index of the first word of the current group</param>
/// <param name="iEnd">Index of the last word of the current group</param>
/// <param name="ltnParentNode"></param>
private LemmaTreeNode(LemmatizerSettings lsett, ExampleList elExamples, int iStart, int iEnd, LemmaTreeNode ltnParentNode) : this(lsett)
{
this.ltnParentNode = ltnParentNode;
this.dictSubNodes = null;
this.iStart = iStart;
this.iEnd = iEnd;
this.elExamples = elExamples;
if (iStart >= elExamples.Count || iEnd >= elExamples.Count || iStart > iEnd)
{
lrBestRule = elExamples.Rules.DefaultRule;
aBestRules = new RuleWeighted[1];
aBestRules[0] = new RuleWeighted(lrBestRule, 0);
dWeight = 0;
return;
}
int iConditionLength = Math.Min(ltnParentNode == null ? 0 : ltnParentNode.iSimilarity + 1, elExamples[iStart].Word.Length);
this.sCondition = elExamples[iStart].Word.Substring(elExamples[iStart].Word.Length - iConditionLength);
this.iSimilarity = elExamples[iStart].Similarity(elExamples[iEnd]);
this.bWholeWord = ltnParentNode == null ? false : elExamples[iEnd].Word.Length == ltnParentNode.iSimilarity;
FindBestRules();
AddSubAll();
//TODO check this heuristics, can be problematic when there are more applicable rules
if (dictSubNodes != null)
{
var lReplaceNodes = new List<KeyValuePair<char, LemmaTreeNode>>();
foreach (var kvpChild in dictSubNodes)
if (kvpChild.Value.dictSubNodes != null && kvpChild.Value.dictSubNodes.Count == 1)
{
var enumChildChild = kvpChild.Value.dictSubNodes.Values.GetEnumerator();
enumChildChild.MoveNext();
var ltrChildChild = enumChildChild.Current;
if (kvpChild.Value.lrBestRule == lrBestRule)
lReplaceNodes.Add(new KeyValuePair<char, LemmaTreeNode>(kvpChild.Key, ltrChildChild));
}
foreach (var kvpChild in lReplaceNodes)
{
dictSubNodes[kvpChild.Key] = kvpChild.Value;
kvpChild.Value.ltnParentNode = this;
}
}
}
#endregion
#region Public Properties
public int TreeSize
{
get
{
int iCount = 1;
if (dictSubNodes != null)
{
foreach (var ltnChild in dictSubNodes.Values)
{
iCount += ltnChild.TreeSize;
}
}
return iCount;
}
}
public double Weight
{
get
{
return dWeight;
}
}
#endregion
#region Essential Class Functions (building model)
private void FindBestRules()
{
/*
* LINQ SPEED TEST (Slower than current metodology)
*
List<LemmaExample> leApplicable = new List<LemmaExample>();
for (int iExm = iStart; iExm <= iEnd; iExm++)
if (elExamples[iExm].Rule.IsApplicableToGroup(sCondition.Length))
leApplicable.Add(elExamples[iExm]);
List<KeyValuePair<LemmaRule, double>> lBestRules = new List<KeyValuePair<LemmaRule,double>>();
lBestRules.AddRange(
leApplicable.
GroupBy<LemmaExample, LemmaRule, double, KeyValuePair<LemmaRule, double>>(
le => le.Rule,
le => le.Weight,
(lr, enumDbl) => new KeyValuePair<LemmaRule, double>(lr, enumDbl.Aggregate((acc, curr) => acc + curr))
).
OrderBy(kvpLrWght=>kvpLrWght.Value)
);
if (lBestRules.Count > 0)
lrBestRule = lBestRules[0].Key;
else {
lrBestRule = elExamples.Rules.DefaultRule;
}
*/
dWeight = 0;
//calculate dWeight of whole node and calculates qualities for all rules
var dictApplicableRules = new Dictionary<LemmaRule, double>();
//dictApplicableRules.Add(elExamples.Rules.DefaultRule, 0);
while (dictApplicableRules.Count == 0)
{
for (var iExm = iStart; iExm <= iEnd; iExm++)
{
var lr = elExamples[iExm].Rule;
var dExmWeight = elExamples[iExm].Weight;
dWeight += dExmWeight;
if (lr.IsApplicableToGroup(sCondition.Length))
{
if (dictApplicableRules.ContainsKey(lr))
dictApplicableRules[lr] += dExmWeight;
else
dictApplicableRules.Add(lr, dExmWeight);
}
}
//if none found then increase condition length or add some default appliable rule
if (dictApplicableRules.Count == 0)
{
if (this.sCondition.Length < iSimilarity)
this.sCondition = elExamples[iStart].Word.Substring(elExamples[iStart].Word.Length - (sCondition.Length + 1));
else
//TODO preveri hevristiko, mogoce je bolje ce se doda default rule namesto rulea od starsa
dictApplicableRules.Add(ltnParentNode.lrBestRule, 0);
}
}
//TODO can optimize this step using sorted list (dont add if it's worse than the worst)
var lSortedRules = new List<RuleWeighted>();
foreach (var kvp in dictApplicableRules)
{
lSortedRules.Add(new RuleWeighted(kvp.Key, kvp.Value / dWeight));
}
lSortedRules.Sort();
//keep just best iMaxRulesPerNode rules
var iNumRules = lSortedRules.Count;
if (lsett.iMaxRulesPerNode > 0)
iNumRules = Math.Min(lSortedRules.Count, lsett.iMaxRulesPerNode);
aBestRules = new RuleWeighted[iNumRules];
for (var iRule = 0; iRule < iNumRules; iRule++)
{
aBestRules[iRule] = lSortedRules[iRule];
}
//set best rule
lrBestRule = aBestRules[0].Rule;
//TODO must check if this hevristics is OK (to privilige parent rule)
if (ltnParentNode != null)
{
for (int iRule = 0; iRule < lSortedRules.Count &&
lSortedRules[iRule].Weight == lSortedRules[0].Weight; iRule++)
{
if (lSortedRules[iRule].Rule == ltnParentNode.lrBestRule)
{
lrBestRule = lSortedRules[iRule].Rule;
break;
}
}
}
}
private void AddSubAll()
{
int iStartGroup = iStart;
var chCharPrev = '\0';
var bSubGroupNeeded = false;
for (var iWrd = iStart; iWrd <= iEnd; iWrd++)
{
var sWord = elExamples[iWrd].Word;
var chCharThis = sWord.Length > iSimilarity ? sWord[sWord.Length - 1 - iSimilarity] : '\0';
if (iWrd != iStart && chCharPrev != chCharThis)
{
if (bSubGroupNeeded)
{
AddSub(iStartGroup, iWrd - 1, chCharPrev);
bSubGroupNeeded = false;
}
iStartGroup = iWrd;
}
//TODO check out bSubGroupNeeded when there are multiple posible rules (not just lrBestRule)
if (elExamples[iWrd].Rule != lrBestRule)
{
bSubGroupNeeded = true;
}
chCharPrev = chCharThis;
}
if (bSubGroupNeeded && iStartGroup != iStart)
{
AddSub(iStartGroup, iEnd, chCharPrev);
}
}
private void AddSub(int iStart, int iEnd, char chChar)
{
var ltnSub = new LemmaTreeNode(lsett, elExamples, iStart, iEnd, this);
//TODO - maybe not realy appropriate because loosing statisitcs from multiple possible rules
if (ltnSub.lrBestRule == lrBestRule && ltnSub.dictSubNodes == null)
return;
if (dictSubNodes == null)
dictSubNodes = new Dictionary<char, LemmaTreeNode>();
dictSubNodes.Add(chChar, ltnSub);
}
#endregion
#region Essential Class Functions (running model = lemmatizing)
public bool ConditionSatisfied(string sWord)
{
//if (bWholeWord)
// return sWord == sCondition;
//else
// return sWord.EndsWith(sCondition);
var iDiff = sWord.Length - sCondition.Length;
if (iDiff < 0 || (bWholeWord && iDiff > 0))
return false;
var iWrdEnd = sCondition.Length - ltnParentNode.sCondition.Length - 1;
for (var iChar = 0; iChar < iWrdEnd; iChar++)
{
if (sCondition[iChar] != sWord[iChar + iDiff])
return false;
}
return true;
}
public string Lemmatize(string sWord)
{
if (sWord.Length >= iSimilarity && dictSubNodes != null)
{
char chChar = sWord.Length > iSimilarity ? sWord[sWord.Length - 1 - iSimilarity] : '\0';
if (dictSubNodes.ContainsKey(chChar) && dictSubNodes[chChar].ConditionSatisfied(sWord))
return dictSubNodes[chChar].Lemmatize(sWord);
}
return lrBestRule.Lemmatize(sWord);
}
#endregion
#region Output Functions (ToString)
public override string ToString()
{
var sb = new StringBuilder();
ToString(sb, 0);
return sb.ToString();
}
private void ToString(StringBuilder sb, int iLevel)
{
sb.Append(new string('\t', iLevel));
sb.AppendFormat("Suffix=\"{0}{1}\"; ", bWholeWord ? "^" : string.Empty, sCondition);
sb.AppendFormat("Rule=\"{0}\"; ", lrBestRule);
sb.AppendFormat("Weight=\"{0}\"; ", dWeight);
if (aBestRules != null && aBestRules.Length > 0)
sb.AppendFormat("Cover={0}; ", aBestRules[0].Weight);
sb.Append("Rulles=");
if (aBestRules != null)
{
foreach (var rw in aBestRules)
sb.AppendFormat(" {0}", rw);
}
sb.Append("; ");
sb.AppendLine();
if (dictSubNodes != null)
{
foreach (var ltnChild in dictSubNodes.Values)
{
ltnChild.ToString(sb, iLevel + 1);
}
}
}
#endregion
#region Serialization Functions (Binary)
public void Serialize(BinaryWriter binWrt)
{
binWrt.Write(dictSubNodes != null);
if (dictSubNodes != null)
{
binWrt.Write(dictSubNodes.Count);
foreach (var kvp in dictSubNodes)
{
binWrt.Write(kvp.Key);
kvp.Value.Serialize(binWrt);
}
}
binWrt.Write(iSimilarity);
binWrt.Write(sCondition);
binWrt.Write(bWholeWord);
binWrt.Write(lrBestRule.Signature);
binWrt.Write(aBestRules.Length);
for (var i = 0; i < aBestRules.Length; i++)
{
binWrt.Write(aBestRules[i].Rule.Signature);
binWrt.Write(aBestRules[i].Weight);
}
binWrt.Write(dWeight);
binWrt.Write(iStart);
binWrt.Write(iEnd);
}
public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode)
{
this.lsett = lsett;
if (binRead.ReadBoolean())
{
dictSubNodes = new Dictionary<char, LemmaTreeNode>();
var iCount = binRead.ReadInt32();
for (var i = 0; i < iCount; i++)
{
var cKey = binRead.ReadChar();
var ltrSub = new LemmaTreeNode(binRead, this.lsett, elExamples, this);
dictSubNodes.Add(cKey, ltrSub);
}
}
else
{
dictSubNodes = null;
}
this.ltnParentNode = ltnParentNode;
iSimilarity = binRead.ReadInt32();
sCondition = binRead.ReadString();
bWholeWord = binRead.ReadBoolean();
lrBestRule = elExamples.Rules[binRead.ReadString()];
var iCountBest = binRead.ReadInt32();
aBestRules = new RuleWeighted[iCountBest];
for (var i = 0; i < iCountBest; i++)
{
aBestRules[i] =
new RuleWeighted(elExamples.Rules[binRead.ReadString()], binRead.ReadDouble());
}
dWeight = binRead.ReadDouble();
iStart = binRead.ReadInt32();
iEnd = binRead.ReadInt32();
this.elExamples = elExamples;
}
public LemmaTreeNode(BinaryReader binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode)
{
Deserialize(binRead, lsett, elExamples, ltnParentNode);
}
#endregion
#region Serialization Functions (Latino)
#if LATINO
public void Save(Latino.BinarySerializer binWrt) {
binWrt.WriteBool(dictSubNodes != null);
if (dictSubNodes != null) {
binWrt.WriteInt(dictSubNodes.Count);
foreach (KeyValuePair<char, LemmaTreeNode> kvp in dictSubNodes) {
binWrt.WriteChar(kvp.Key);
kvp.Value.Save(binWrt);
}
}
binWrt.WriteInt(iSimilarity);
binWrt.WriteString(sCondition);
binWrt.WriteBool(bWholeWord);
binWrt.WriteString(lrBestRule.Signature);
binWrt.WriteInt(aBestRules.Length);
for (int i = 0; i < aBestRules.Length; i++) {
binWrt.WriteString(aBestRules[i].Rule.Signature);
binWrt.WriteDouble(aBestRules[i].Weight);
}
binWrt.WriteDouble(dWeight);
binWrt.WriteInt(iStart);
binWrt.WriteInt(iEnd);
}
public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode) {
this.lsett = lsett;
if (binRead.ReadBool()) {
dictSubNodes = new Dictionary<char, LemmaTreeNode>();
int iCount = binRead.ReadInt();
for (int i = 0; i < iCount; i++) {
char cKey = binRead.ReadChar();
LemmaTreeNode ltrSub = new LemmaTreeNode(binRead, this.lsett, elExamples, this);
dictSubNodes.Add(cKey, ltrSub);
}
}
else
dictSubNodes = null;
this.ltnParentNode = ltnParentNode;
iSimilarity = binRead.ReadInt();
sCondition = binRead.ReadString();
bWholeWord = binRead.ReadBool();
lrBestRule = elExamples.Rules[binRead.ReadString()];
int iCountBest = binRead.ReadInt();
aBestRules = new RuleWeighted[iCountBest];
for (int i = 0; i < iCountBest; i++)
aBestRules[i] = new RuleWeighted(elExamples.Rules[binRead.ReadString()], binRead.ReadDouble());
dWeight = binRead.ReadDouble();
iStart = binRead.ReadInt();
iEnd = binRead.ReadInt();
this.elExamples = elExamples;
}
public LemmaTreeNode(Latino.BinarySerializer binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode) {
Load(binRead, lsett, elExamples, ltnParentNode);
}
#endif
#endregion
#region Other (Temporarly)
//TODO - this is temp function, remove it
public bool CheckConsistency()
{
var bReturn = true;
if (dictSubNodes != null)
foreach (var ltnChild in dictSubNodes.Values)
bReturn = bReturn &&
ltnChild.CheckConsistency() &&
ltnChild.sCondition.EndsWith(sCondition);
return bReturn;
}
#endregion
}
}

@ -1,465 +0,0 @@
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Runtime.Serialization;
using System.IO.Compression;
using SevenZip;
namespace LemmaSharp
{
[Serializable]
public class Lemmatizer : ITrainableLemmatizer
#if LATINO
, Latino.ISerializable
#endif
{
#region Private Variables
protected LemmatizerSettings lsett;
protected ExampleList elExamples;
protected LemmaTreeNode ltnRootNode;
protected LemmaTreeNode ltnRootNodeFront;
#endregion
#region Constructor(s)
public Lemmatizer() :
this(new LemmatizerSettings())
{ }
public Lemmatizer(LemmatizerSettings lsett)
{
this.lsett = lsett;
this.elExamples = new ExampleList(lsett);
this.ltnRootNode = null;
this.ltnRootNodeFront = null;
}
public Lemmatizer(StreamReader srIn, string sFormat, LemmatizerSettings lsett) : this(lsett)
{
AddMultextFile(srIn, sFormat);
}
#endregion
#region Private Properties
private LemmaTreeNode ltrRootNodeSafe
{
get
{
if (ltnRootNode == null)
BuildModel();
return ltnRootNode;
}
}
private LemmaTreeNode ltrRootNodeFrontSafe
{
get
{
if (ltnRootNodeFront == null && lsett.bBuildFrontLemmatizer)
BuildModel();
return ltnRootNodeFront;
}
}
#endregion
#region Public Properties
public LemmatizerSettings Settings
{
get
{
return lsett.CloneDeep();
}
}
public ExampleList Examples
{
get
{
return elExamples;
}
}
public RuleList Rules
{
get
{
return elExamples.Rules;
}
}
public LemmaTreeNode RootNode
{
get
{
return ltrRootNodeSafe;
}
}
public LemmaTreeNode RootNodeFront
{
get
{
return ltrRootNodeFrontSafe;
}
}
public ILemmatizerModel Model
{
get
{
return ltrRootNodeSafe;
}
}
#endregion
#region Essential Class Functions (adding examples to repository)
public void AddMultextFile(StreamReader srIn, string sFormat)
{
this.elExamples.AddMultextFile(srIn, sFormat);
ltnRootNode = null;
}
public void AddExample(string sWord, string sLemma)
{
AddExample(sWord, sLemma, 1, null);
}
public void AddExample(string sWord, string sLemma, double dWeight)
{
AddExample(sWord, sLemma, dWeight, null);
}
public void AddExample(string sWord, string sLemma, double dWeight, string sMsd)
{
elExamples.AddExample(sWord, sLemma, dWeight, sMsd);
ltnRootNode = null;
}
public void DropExamples()
{
elExamples.DropExamples();
}
public void FinalizeAdditions()
{
elExamples.FinalizeAdditions();
}
#endregion
#region Essential Class Functions (building model & lemmatizing)
public void BuildModel()
{
if (ltnRootNode != null)
return;
if (!lsett.bBuildFrontLemmatizer)
{
//TODO remove: elExamples.FinalizeAdditions();
elExamples.FinalizeAdditions();
ltnRootNode = new LemmaTreeNode(lsett, elExamples);
}
else
{
ltnRootNode = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(false));
ltnRootNodeFront = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(true));
}
}
public string Lemmatize(string sWord)
{
if (!lsett.bBuildFrontLemmatizer)
{
return ltrRootNodeSafe.Lemmatize(sWord);
}
var sWordFront = LemmaExample.StringReverse(sWord);
var sLemmaFront = ltrRootNodeFrontSafe.Lemmatize(sWordFront);
var sWordRear = LemmaExample.StringReverse(sLemmaFront);
return ltrRootNodeSafe.Lemmatize(sWordRear);
}
#endregion
#region Serialization Functions (ISerializable)
public void GetObjectData(SerializationInfo info, StreamingContext context)
{
info.AddValue("lsett", lsett);
info.AddValue("elExamples", elExamples);
}
public Lemmatizer(SerializationInfo info, StreamingContext context) : this()
{
lsett = (LemmatizerSettings)info.GetValue("lsett", typeof(LemmatizerSettings));
elExamples = (ExampleList)info.GetValue("elExamples", typeof(ExampleList));
this.BuildModel();
}
#endregion
#region Serialization Functions (Binary)
public void Serialize(BinaryWriter binWrt, bool bSerializeExamples)
{
lsett.Serialize(binWrt);
binWrt.Write(bSerializeExamples);
elExamples.Serialize(binWrt, bSerializeExamples, false);
if (!bSerializeExamples)
{
elExamples.GetFrontRearExampleList(false).Serialize(binWrt, bSerializeExamples, false);
elExamples.GetFrontRearExampleList(true).Serialize(binWrt, bSerializeExamples, false);
}
ltnRootNode.Serialize(binWrt);
if (lsett.bBuildFrontLemmatizer)
ltnRootNodeFront.Serialize(binWrt);
}
public void Deserialize(BinaryReader binRead)
{
lsett = new LemmatizerSettings(binRead);
var bSerializeExamples = binRead.ReadBoolean();
elExamples = new ExampleList(binRead, lsett);
ExampleList elExamplesRear;
ExampleList elExamplesFront;
if (bSerializeExamples)
{
elExamplesRear = elExamples.GetFrontRearExampleList(false);
elExamplesFront = elExamples.GetFrontRearExampleList(true);
}
else
{
elExamplesRear = new ExampleList(binRead, lsett);
elExamplesFront = new ExampleList(binRead, lsett);
}
if (!lsett.bBuildFrontLemmatizer)
{
ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples, null);
}
else
{
ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamplesRear, null);
ltnRootNodeFront = new LemmaTreeNode(binRead, lsett, elExamplesFront, null);
}
}
//Do not change the order!!! (If new compression algorithms are added, otherwise you will not be able to load old files.)
public enum Compression
{
None,
Deflate,
LZMA
}
public Lemmatizer(BinaryReader binRead)
{
var compr = (Compression)binRead.ReadByte();
if (compr == Compression.None)
Deserialize(binRead);
else
throw new Exception("Loading lemmatizer with binary reader on uncompressed stream is not supported.");
}
public Lemmatizer(Stream streamIn)
{
Deserialize(streamIn);
}
public void Serialize(Stream streamOut)
{
Serialize(streamOut, true, Compression.None);
}
public void Serialize(Stream streamOut, bool bSerializeExamples)
{
Serialize(streamOut, bSerializeExamples, Compression.None);
}
public void Serialize(Stream streamOut, bool bSerializeExamples, Compression compress)
{
streamOut.WriteByte((byte)compress);
switch (compress)
{
case Compression.None:
SerializeNone(streamOut, bSerializeExamples);
break;
case Compression.Deflate:
SerializeDeflate(streamOut, bSerializeExamples);
break;
case Compression.LZMA:
SerializeLZMA(streamOut, bSerializeExamples);
break;
default:
break;
}
}
private void SerializeNone(Stream streamOut, bool bSerializeExamples)
{
using (var binWrt = new BinaryWriter(streamOut))
{
this.Serialize(binWrt, bSerializeExamples);
}
}
private void SerializeDeflate(Stream streamOut, bool bSerializeExamples)
{
using (var streamOutNew = new DeflateStream(streamOut, CompressionMode.Compress, true))
{
using (var binWrt = new BinaryWriter(streamOutNew))
{
this.Serialize(binWrt, bSerializeExamples);
binWrt.Flush();
binWrt.Close();
}
}
}
private void SerializeLZMA(Stream streamOut, bool bSerializeExamples)
{
CoderPropID[] propIDs =
{
CoderPropID.DictionarySize,
CoderPropID.PosStateBits,
CoderPropID.LitContextBits,
CoderPropID.LitPosBits,
CoderPropID.Algorithm,
CoderPropID.NumFastBytes,
CoderPropID.MatchFinder,
CoderPropID.EndMarker
};
Int32 dictionary = 1 << 23;
Int32 posStateBits = 2;
Int32 litContextBits = 3; // for normal files
Int32 litPosBits = 0;
Int32 algorithm = 2;
Int32 numFastBytes = 128;
var mf = "bt4";
var eos = false;
object[] properties =
{
(Int32)(dictionary),
(Int32)(posStateBits),
(Int32)(litContextBits),
(Int32)(litPosBits),
(Int32)(algorithm),
(Int32)(numFastBytes),
mf,
eos
};
using (var msTemp = new MemoryStream())
{
using (var binWrtTemp = new BinaryWriter(msTemp))
{
this.Serialize(binWrtTemp, bSerializeExamples);
msTemp.Position = 0;
var encoder = new SevenZip.Compression.LZMA.Encoder();
encoder.SetCoderProperties(propIDs, properties);
encoder.WriteCoderProperties(streamOut);
var fileSize = msTemp.Length;
for (int i = 0; i < 8; i++)
{
streamOut.WriteByte((Byte)(fileSize >> (8 * i)));
}
encoder.Code(msTemp, streamOut, -1, -1, null);
binWrtTemp.Close();
encoder = null;
}
msTemp.Close();
}
}
public void Deserialize(Stream streamIn)
{
var compr = (Compression)streamIn.ReadByte();
using (var streamInNew = Decompress(streamIn, compr))
{
using (var br = new BinaryReader(streamInNew))
{
Deserialize(br);
}
}
}
private Stream Decompress(Stream streamIn, Compression compress)
{
Stream streamInNew;
switch (compress)
{
case Compression.None:
default:
streamInNew = streamIn;
break;
case Compression.Deflate:
streamInNew = new DeflateStream(streamIn, CompressionMode.Decompress);
break;
case Compression.LZMA:
streamInNew = DecompressLZMA(streamIn);
break;
}
return streamInNew;
}
private Stream DecompressLZMA(Stream streamIn)
{
var properties = new byte[5];
if (streamIn.Read(properties, 0, 5) != 5)
throw new Exception("input .lzma is too short");
var decoder = new SevenZip.Compression.LZMA.Decoder();
decoder.SetDecoderProperties(properties);
long outSize = 0;
for (var i = 0; i < 8; i++)
{
var v = streamIn.ReadByte();
if (v < 0)
throw (new Exception("Can't Read 1"));
outSize |= ((long)(byte)v) << (8 * i);
}
var compressedSize = streamIn.Length - streamIn.Position;
var outStream = new MemoryStream();
decoder.Code(streamIn, outStream, compressedSize, outSize, null);
outStream.Seek(0, 0);
decoder = null;
return outStream;
}
#endregion
#region Serialization Functions (Latino)
#if LATINO
public void Save(Latino.BinarySerializer binWrt) {
lsett.Save(binWrt);
elExamples.Save(binWrt, true, false);
ltnRootNode.Save(binWrt);
if (lsett.bBuildFrontLemmatizer)
ltnRootNodeFront.Save(binWrt);
}
public void Load(Latino.BinarySerializer binRead) {
lsett = new LemmatizerSettings(binRead);
elExamples = new ExampleList(binRead, lsett);
if (!lsett.bBuildFrontLemmatizer) {
ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples, null);
}
else {
ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples.GetFrontRearExampleList(false) , null);
ltnRootNodeFront = new LemmaTreeNode(binRead, lsett, elExamples.GetFrontRearExampleList(true), null);
}
}
public Lemmatizer(Latino.BinarySerializer binRead) {
Load(binRead);
}
public void Save(Stream streamOut) {
Latino.BinarySerializer binWrt = new Latino.BinarySerializer(streamOut);
this.Save(binWrt);
binWrt.Close();
}
public void Load(Stream streamIn) {
Latino.BinarySerializer binRead = new Latino.BinarySerializer(streamIn);
Load(binRead);
binRead.Close();
}
public Lemmatizer(Stream streamIn, string sDummy) {
Load(streamIn);
}
#endif
#endregion
}
}

@ -1,143 +0,0 @@
using System;
using System.IO;
using System.Runtime.Serialization;
namespace LemmaSharp
{
/// <summary>
/// These are the lemmagen algorithm settings that affect speed/power of the learning and lemmatizing algorithm.
/// TODO this class will be probbably removed in the future.
/// </summary>
[Serializable]
public class LemmatizerSettings : ISerializable
{
#region Constructor(s)
public LemmatizerSettings()
{
}
#endregion
#region Sub-Structures
/// <summary>
/// How algorithm considers msd tags.
/// </summary>
public enum MsdConsideration
{
/// <summary>
/// Completely ignores mds tags (join examples with different tags and sum their weihgts).
/// </summary>
Ignore,
/// <summary>
/// Same examples with different msd's are not considered equal and joined.
/// </summary>
Distinct,
/// <summary>
/// Joins examples with different tags (concatenates all msd tags).
/// </summary>
JoinAll,
/// <summary>
/// Joins examples with different tags (concatenates just distinct msd tags - somehow slower).
/// </summary>
JoinDistinct,
/// <summary>
/// Joins examples with different tags (new tag is the left to right substring that all joined examples share).
/// </summary>
JoinSameSubstring
}
#endregion
#region Public Variables
/// <summary>
/// True if from string should be included in rule identifier ([from]->[to]). False if just length of from string is used ([#len]->[to]).
/// </summary>
public bool bUseFromInRules = true;
/// <summary>
/// Specification how algorithm considers msd tags.
/// </summary>
public MsdConsideration eMsdConsider = MsdConsideration.Distinct;
/// <summary>
/// How many of the best rules are kept in memory for each node. Zero means unlimited.
/// </summary>
public int iMaxRulesPerNode = 0;
/// <summary>
/// If true, than build proccess uses few more hevristics to build first left to right lemmatizer (lemmatizes front of the word)
/// </summary>
public bool bBuildFrontLemmatizer = false;
#endregion
#region Cloneable functions
public LemmatizerSettings CloneDeep()
{
return new LemmatizerSettings()
{
bUseFromInRules = this.bUseFromInRules,
eMsdConsider = this.eMsdConsider,
iMaxRulesPerNode = this.iMaxRulesPerNode,
bBuildFrontLemmatizer = this.bBuildFrontLemmatizer
};
}
#endregion
#region Serialization Functions (ISerializable)
public void GetObjectData(SerializationInfo info, StreamingContext context)
{
info.AddValue("bUseFromInRules", bUseFromInRules);
info.AddValue("eMsdConsider", eMsdConsider);
info.AddValue("iMaxRulesPerNode", iMaxRulesPerNode);
info.AddValue("bBuildFrontLemmatizer", bBuildFrontLemmatizer);
}
public LemmatizerSettings(SerializationInfo info, StreamingContext context)
{
bUseFromInRules = info.GetBoolean("bUseFromInRules");
eMsdConsider = (MsdConsideration)info.GetValue("eMsdConsider", typeof(MsdConsideration));
iMaxRulesPerNode = info.GetInt32("iMaxRulesPerNode");
bBuildFrontLemmatizer = info.GetBoolean("bBuildFrontLemmatizer");
}
#endregion
#region Serialization Functions (Binary)
public void Serialize(BinaryWriter binWrt)
{
binWrt.Write(bUseFromInRules);
binWrt.Write((int)eMsdConsider);
binWrt.Write(iMaxRulesPerNode);
binWrt.Write(bBuildFrontLemmatizer);
}
public void Deserialize(BinaryReader binRead)
{
bUseFromInRules = binRead.ReadBoolean();
eMsdConsider = (MsdConsideration)binRead.ReadInt32();
iMaxRulesPerNode = binRead.ReadInt32();
bBuildFrontLemmatizer = binRead.ReadBoolean();
}
public LemmatizerSettings(System.IO.BinaryReader binRead)
{
this.Deserialize(binRead);
}
#endregion
#region Serialization Functions (Latino)
#if LATINO
public void Save(Latino.BinarySerializer binWrt) {
binWrt.WriteBool(bUseFromInRules);
binWrt.WriteInt((int)eMsdConsider);
binWrt.WriteInt(iMaxRulesPerNode);
binWrt.WriteBool(bBuildFrontLemmatizer);
}
public void Load(Latino.BinarySerializer binRead) {
bUseFromInRules = binRead.ReadBool();
eMsdConsider = (MsdConsideration)binRead.ReadInt();
iMaxRulesPerNode = binRead.ReadInt();
bBuildFrontLemmatizer = binRead.ReadBool();
}
public LemmatizerSettings(Latino.BinarySerializer reader) {
Load(reader);
}
#endif
#endregion
}
}

@ -1,161 +0,0 @@
using System.Collections.Generic;
using System.IO;
namespace LemmaSharp
{
public class RuleList : Dictionary<string, LemmaRule>
{
#region Private Variables
private LemmatizerSettings lsett;
private LemmaRule lrDefaultRule;
#endregion
#region Constructor(s)
public RuleList(LemmatizerSettings lsett)
{
this.lsett = lsett;
lrDefaultRule = AddRule(new LemmaRule("", "", 0, lsett));
}
#endregion
#region Public Properties
public LemmaRule DefaultRule
{
get
{
return lrDefaultRule;
}
}
#endregion
#region Essential Class Functions
public LemmaRule AddRule(LemmaExample le)
{
return AddRule(new LemmaRule(le.Word, le.Lemma, this.Count, lsett));
}
private LemmaRule AddRule(LemmaRule lrRuleNew)
{
LemmaRule lrRuleReturn = null;
if (!this.TryGetValue(lrRuleNew.Signature, out lrRuleReturn))
{
lrRuleReturn = lrRuleNew;
this.Add(lrRuleReturn.Signature, lrRuleReturn);
}
return lrRuleReturn;
}
#endregion
#region Serialization Functions (Binary)
public void Serialize(BinaryWriter binWrt, bool bThisTopObject)
{
//save metadata
binWrt.Write(bThisTopObject);
//save value types --------------------------------------
//save refernce types if needed -------------------------
if (bThisTopObject)
lsett.Serialize(binWrt);
//save list items ---------------------------------------
var iCount = this.Count;
binWrt.Write(iCount);
foreach (var kvp in this)
{
binWrt.Write(kvp.Key);
kvp.Value.Serialize(binWrt, false);
}
//default rule is already saved in the list. Here just save its id.
binWrt.Write(lrDefaultRule.Signature);
}
public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett)
{
//load metadata
var bThisTopObject = binRead.ReadBoolean();
//load value types --------------------------------------
//load refernce types if needed -------------------------
if (bThisTopObject)
this.lsett = new LemmatizerSettings(binRead);
else
this.lsett = lsett;
//load list items ---------------------------------------
this.Clear();
int iCount = binRead.ReadInt32();
for (var iId = 0; iId < iCount; iId++)
{
var sKey = binRead.ReadString();
var lrVal = new LemmaRule(binRead, this.lsett);
this.Add(sKey, lrVal);
}
//link the default rule just Id was saved.
lrDefaultRule = this[binRead.ReadString()];
}
public RuleList(System.IO.BinaryReader binRead, LemmatizerSettings lsett)
{
this.Deserialize(binRead, lsett);
}
#endregion
#region Serialization Functions (Latino)
#if LATINO
public void Save(Latino.BinarySerializer binWrt, bool bThisTopObject) {
//save metadata
binWrt.WriteBool(bThisTopObject);
//save value types --------------------------------------
//save refernce types if needed -------------------------
if (bThisTopObject)
lsett.Save(binWrt);
//save list items ---------------------------------------
int iCount = this.Count;
binWrt.WriteInt(iCount);
foreach (KeyValuePair<string, LemmaRule> kvp in this) {
binWrt.WriteString(kvp.Key);
kvp.Value.Save(binWrt, false);
}
//default rule is already saved in the list. Here just save its id.
binWrt.WriteString(lrDefaultRule.Signature);
}
public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett) {
//load metadata
bool bThisTopObject = binRead.ReadBool();
//load value types --------------------------------------
//load refernce types if needed -------------------------
if (bThisTopObject)
this.lsett = new LemmatizerSettings(binRead);
else
this.lsett = lsett;
//load list items ---------------------------------------
this.Clear();
int iCount = binRead.ReadInt();
for (int iId = 0; iId < iCount; iId++) {
string sKey = binRead.ReadString();
LemmaRule lrVal = new LemmaRule(binRead, this.lsett);
this.Add(sKey, lrVal);
}
//link the default rule just Id was saved.
lrDefaultRule = this[binRead.ReadString()];
}
public RuleList(Latino.BinarySerializer binRead, LemmatizerSettings lsett) {
Load(binRead, lsett);
}
#endif
#endregion
}
}

@ -1,50 +0,0 @@
using System;
namespace LemmaSharp
{
[Serializable]
class RuleWeighted : IComparable<RuleWeighted>
{
#region Private Variables
private LemmaRule lrRule;
private double dWeight;
#endregion
#region Constructor(s)
public RuleWeighted(LemmaRule lrRule, double dWeight)
{
this.lrRule = lrRule;
this.dWeight = dWeight;
}
#endregion
#region Public Properties
public LemmaRule Rule
{
get { return lrRule; }
}
public double Weight
{
get { return dWeight; }
}
#endregion
#region Essential Class Functions (comparing objects, eg.: for sorting)
public int CompareTo(RuleWeighted rl)
{
if (this.dWeight < rl.dWeight) return 1;
if (this.dWeight > rl.dWeight) return -1;
if (this.lrRule.Id < rl.lrRule.Id) return 1;
if (this.lrRule.Id > rl.lrRule.Id) return -1;
return 0;
}
#endregion
#region Output & Serialization Functions
public override string ToString()
{
return string.Format("{0}{1:(0.00%)}", lrRule, dWeight);
}
#endregion
}
}

@ -1,9 +0,0 @@
using System.Runtime.Serialization;
namespace LemmaSharp
{
public interface ILemmatizer : ISerializable
{
string Lemmatize(string sWord);
}
}

@ -1,8 +0,0 @@
namespace LemmaSharp
{
public interface ILemmatizerModel
{
string Lemmatize(string sWord);
string ToString();
}
}

@ -1,12 +0,0 @@
namespace LemmaSharp
{
public interface ITrainableLemmatizer : ILemmatizer
{
ExampleList Examples { get; }
ILemmatizerModel Model { get; }
void AddExample(string sWord, string sLemma);
void AddExample(string sWord, string sLemma, double dWeight);
void AddExample(string sWord, string sLemma, double dWeight, string sMsd);
void BuildModel();
}
}

@ -1,539 +0,0 @@
/*==========================================================================;
*
* (c) 2004-08 JSI. All rights reserved.
*
* File: BinarySerializer.cs
* Version: 1.0
* Desc: Binary serializer
* Author: Miha Grcar
* Created on: Oct-2004
* Last modified: May-2008
* Revision: May-2008
*
***************************************************************************/
//Remark: Use this file as Latino compatibility checker. When it is included in
// the project it defines symbol LATINO, that should enable all Latino specific
// serialization functions. When excluded, this code will not be created and also
// following Latino namspace will not be added to the project.
using System;
using System.Runtime.InteropServices;
using System.Collections.Generic;
using System.Reflection;
using System.Text;
using System.IO;
#if LATINO
namespace Latino
{
/* .-----------------------------------------------------------------------
|
| Class BinarySerializer
|
'-----------------------------------------------------------------------
*/
public interface ISerializable {
// *** note that you need to implement a constructor that loads the instance if the class implements Latino.ISerializable
void Save(Latino.BinarySerializer writer);
}
public class BinarySerializer
{
private static Dictionary<string, string> m_full_to_short_type_name
= new Dictionary<string, string>();
private static Dictionary<string, string> m_short_to_full_type_name
= new Dictionary<string, string>();
private Stream m_stream;
private string m_data_dir
= ".";
private static void RegisterTypeName(string full_type_name, string short_type_name)
{
m_full_to_short_type_name.Add(full_type_name, short_type_name);
m_short_to_full_type_name.Add(short_type_name, full_type_name);
}
private static string GetFullTypeName(string short_type_name)
{
return m_short_to_full_type_name.ContainsKey(short_type_name) ? m_short_to_full_type_name[short_type_name] : short_type_name;
}
private static string GetShortTypeName(string full_type_name)
{
return m_full_to_short_type_name.ContainsKey(full_type_name) ? m_full_to_short_type_name[full_type_name] : full_type_name;
}
static BinarySerializer()
{
RegisterTypeName(typeof(bool).AssemblyQualifiedName, "b");
RegisterTypeName(typeof(byte).AssemblyQualifiedName, "ui1");
RegisterTypeName(typeof(sbyte).AssemblyQualifiedName, "i1");
RegisterTypeName(typeof(char).AssemblyQualifiedName, "c");
RegisterTypeName(typeof(double).AssemblyQualifiedName, "f8");
RegisterTypeName(typeof(float).AssemblyQualifiedName, "f4");
RegisterTypeName(typeof(int).AssemblyQualifiedName, "i4");
RegisterTypeName(typeof(uint).AssemblyQualifiedName, "ui4");
RegisterTypeName(typeof(long).AssemblyQualifiedName, "i8");
RegisterTypeName(typeof(ulong).AssemblyQualifiedName, "ui8");
RegisterTypeName(typeof(short).AssemblyQualifiedName, "i2");
RegisterTypeName(typeof(ushort).AssemblyQualifiedName, "ui2");
RegisterTypeName(typeof(string).AssemblyQualifiedName, "s");
}
public BinarySerializer(Stream stream)
{
//Utils.ThrowException(stream == null ? new ArgumentNullException("stream") : null);
m_stream = stream;
}
public BinarySerializer()
{
m_stream = new MemoryStream();
}
public BinarySerializer(string file_name, FileMode file_mode)
{
m_stream = new FileStream(file_name, file_mode); // throws ArgumentException, NotSupportedException, ArgumentNullException, SecurityException, FileNotFoundException, IOException, DirectoryNotFoundException, PathTooLongException, ArgumentOutOfRangeException
}
// *** Reading ***
private byte[] Read<T>() // Read<T>() is directly or indirectly called from several methods thus exceptions thrown here can also be thrown in all those methods
{
int sz = Marshal.SizeOf(typeof(T));
byte[] buffer = new byte[sz];
int num_bytes = m_stream.Read(buffer, 0, sz); // throws IOException, NotSupportedException, ObjectDisposedException
//Utils.ThrowException(num_bytes < sz ? new EndOfStreamException() : null);
return buffer;
}
public bool ReadBool()
{
return ReadByte() != 0;
}
public byte ReadByte() // ReadByte() is directly or indirectly called from several methods thus exceptions thrown here can also be thrown in all those methods
{
int val = m_stream.ReadByte(); // throws NotSupportedException, ObjectDisposedException
//Utils.ThrowException(val < 0 ? new EndOfStreamException() : null);
return (byte)val;
}
public sbyte ReadSByte()
{
return (sbyte)ReadByte();
}
private char ReadChar8()
{
return (char)ReadByte();
}
private char ReadChar16()
{
return BitConverter.ToChar(Read<ushort>(), 0);
}
public char ReadChar()
{
return ReadChar16();
}
public double ReadDouble()
{
return BitConverter.ToDouble(Read<double>(), 0);
}
public float ReadFloat()
{
return BitConverter.ToSingle(Read<float>(), 0);
}
public int ReadInt()
{
return BitConverter.ToInt32(Read<int>(), 0);
}
public uint ReadUInt()
{
return BitConverter.ToUInt32(Read<uint>(), 0);
}
public long ReadLong()
{
return BitConverter.ToInt64(Read<long>(), 0);
}
public ulong ReadULong()
{
return BitConverter.ToUInt64(Read<ulong>(), 0);
}
public short ReadShort()
{
return BitConverter.ToInt16(Read<short>(), 0);
}
public ushort ReadUShort()
{
return BitConverter.ToUInt16(Read<ushort>(), 0);
}
private string ReadString8()
{
int len = ReadInt();
if (len < 0) { return null; }
byte[] buffer = new byte[len];
m_stream.Read(buffer, 0, len); // throws IOException, NotSupportedException, ObjectDisposedException
return Encoding.ASCII.GetString(buffer);
}
private string ReadString16()
{
int len = ReadInt();
if (len < 0) { return null; }
byte[] buffer = new byte[len * 2];
m_stream.Read(buffer, 0, len * 2); // throws IOException, NotSupportedException, ObjectDisposedException
return Encoding.Unicode.GetString(buffer);
}
public string ReadString()
{
return ReadString16(); // throws exceptions (see ReadString16())
}
public Type ReadType()
{
string type_name = ReadString8(); // throws exceptions (see ReadString8())
//Utils.ThrowException(type_name == null ? new InvalidDataException() : null);
return Type.GetType(GetFullTypeName(type_name)); // throws TargetInvocationException, ArgumentException, TypeLoadException, FileNotFoundException, FileLoadException, BadImageFormatException
}
public ValueType ReadValue(Type type)
{
//Utils.ThrowException(type == null ? new ArgumentNullException("type") : null);
//Utils.ThrowException(!type.IsValueType ? new InvalidArgumentValueException("type") : null);
if (type == typeof(bool))
{
return ReadBool();
}
else if (type == typeof(byte))
{
return ReadByte();
}
else if (type == typeof(sbyte))
{
return ReadSByte();
}
else if (type == typeof(char))
{
return ReadChar();
}
else if (type == typeof(double))
{
return ReadDouble();
}
else if (type == typeof(float))
{
return ReadFloat();
}
else if (type == typeof(int))
{
return ReadInt();
}
else if (type == typeof(uint))
{
return ReadUInt();
}
else if (type == typeof(long))
{
return ReadLong();
}
else if (type == typeof(ulong))
{
return ReadULong();
}
else if (type == typeof(short))
{
return ReadShort();
}
else if (type == typeof(ushort))
{
return ReadUShort();
}
else if (typeof(Latino.ISerializable).IsAssignableFrom(type))
{
ConstructorInfo cxtor = type.GetConstructor(new Type[] { typeof(Latino.BinarySerializer) });
//Utils.ThrowException(cxtor == null ? new ArgumentNotSupportedException("type") : null);
return (ValueType)cxtor.Invoke(new object[] { this }); // throws MemberAccessException, MethodAccessException, TargetInvocationException, NotSupportedException, SecurityException
}
else
{
//throw new ArgumentNotSupportedException("type");
throw new Exception("type");
}
}
public T ReadValue<T>()
{
return (T)(object)ReadValue(typeof(T)); // throws exceptions (see ReadValue(Type type))
}
public object ReadObject(Type type)
{
//Utils.ThrowException(type == null ? new ArgumentNullException("type") : null);
switch (ReadByte())
{
case 0:
return null;
case 1:
break;
case 2:
Type type_0 = ReadType(); // throws exceptions (see ReadType())
//Utils.ThrowException(type_0 == null ? new TypeLoadException() : null);
//Utils.ThrowException(!type.IsAssignableFrom(type_0) ? new InvalidArgumentValueException("type") : null);
type = type_0;
break;
default:
throw new InvalidDataException();
}
if (type == typeof(string))
{
return ReadString();
}
else if (typeof(Latino.ISerializable).IsAssignableFrom(type))
{
ConstructorInfo cxtor = type.GetConstructor(new Type[] { typeof(Latino.BinarySerializer) });
//Utils.ThrowException(cxtor == null ? new ArgumentNotSupportedException("type") : null);
return cxtor.Invoke(new object[] { this }); // throws MemberAccessException, MethodAccessException, TargetInvocationException, NotSupportedException, SecurityException
}
else if (type.IsValueType)
{
return ReadValue(type); // throws exceptions (see ReadValue(Type type))
}
else
{
//throw new InvalidArgumentValueException("type");
throw new Exception("type");
}
}
public T ReadObject<T>()
{
return (T)ReadObject(typeof(T)); // throws exceptions (see ReadObject(Type type))
}
public object ReadValueOrObject(Type type)
{
//Utils.ThrowException(type == null ? new ArgumentNullException("type") : null);
if (type.IsValueType)
{
return ReadValue(type); // throws exceptions (see ReadValue(Type type))
}
else
{
return ReadObject(type); // throws exceptions (see ReadObject(Type type))
}
}
public T ReadValueOrObject<T>()
{
return (T)ReadValueOrObject(typeof(T)); // throws exceptions (see ReadValueOrObject(Type type))
}
// *** Writing ***
private void Write(byte[] data) // Write(byte[] data) is directly or indirectly called from several methods thus exceptions thrown here can also be thrown in all those methods
{
m_stream.Write(data, 0, data.Length); // throws IOException, NotSupportedException, ObjectDisposedException
}
public void WriteBool(bool val)
{
WriteByte(val ? (byte)1 : (byte)0);
}
public void WriteByte(byte val) // WriteByte(byte val) is directly or indirectly called from several methods thus exceptions thrown here can also be thrown in all those methods
{
m_stream.WriteByte(val); // throws IOException, NotSupportedException, ObjectDisposedException
}
public void WriteSByte(sbyte val)
{
WriteByte((byte)val);
}
private void WriteChar8(char val)
{
WriteByte(Encoding.ASCII.GetBytes(new char[] { val })[0]);
}
private void WriteChar16(char val)
{
Write(BitConverter.GetBytes((ushort)val));
}
public void WriteChar(char val)
{
WriteChar16(val);
}
public void WriteDouble(double val)
{
Write(BitConverter.GetBytes(val));
}
public void WriteFloat(float val)
{
Write(BitConverter.GetBytes(val));
}
public void WriteInt(int val)
{
Write(BitConverter.GetBytes(val));
}
public void WriteUInt(uint val)
{
Write(BitConverter.GetBytes(val));
}
public void WriteLong(long val)
{
Write(BitConverter.GetBytes(val));
}
public void WriteULong(ulong val)
{
Write(BitConverter.GetBytes(val));
}
public void WriteShort(short val)
{
Write(BitConverter.GetBytes(val));
}
public void WriteUShort(ushort val)
{
Write(BitConverter.GetBytes(val));
}
private void WriteString8(string val)
{
if (val == null) { WriteInt(-1); return; }
WriteInt(val.Length);
Write(Encoding.ASCII.GetBytes(val));
}
private void WriteString16(string val)
{
if (val == null) { WriteInt(-1); return; }
WriteInt(val.Length);
Write(Encoding.Unicode.GetBytes(val));
}
public void WriteString(string val)
{
WriteString16(val);
}
public void WriteValue(ValueType val)
{
if (val is bool)
{
WriteBool((bool)val);
}
else if (val is byte)
{
WriteByte((byte)val);
}
else if (val is sbyte)
{
WriteSByte((sbyte)val);
}
else if (val is char)
{
WriteChar((char)val);
}
else if (val is double)
{
WriteDouble((double)val);
}
else if (val is float)
{
WriteFloat((float)val);
}
else if (val is int)
{
WriteInt((int)val);
}
else if (val is uint)
{
WriteUInt((uint)val);
}
else if (val is long)
{
WriteLong((long)val);
}
else if (val is ulong)
{
WriteULong((ulong)val);
}
else if (val is short)
{
WriteShort((short)val);
}
else if (val is ushort)
{
WriteUShort((ushort)val);
}
else if (val is Latino.ISerializable)
{
((Latino.ISerializable)val).Save(this); // throws serialization-related exceptions
}
else
{
//throw new ArgumentTypeException("val");
}
}
public void WriteObject(Type type, object obj)
{
//Utils.ThrowException(type == null ? new ArgumentNullException("type") : null);
//Utils.ThrowException((obj != null && !type.IsAssignableFrom(obj.GetType())) ? new ArgumentTypeException("obj") : null);
if (obj == null)
{
WriteByte(0);
}
else
{
Type obj_type = obj.GetType();
if (obj_type == type)
{
WriteByte(1);
}
else
{
WriteByte(2);
WriteType(obj_type);
}
if (obj is string)
{
WriteString((string)obj);
}
else if (obj is Latino.ISerializable)
{
((Latino.ISerializable)obj).Save(this); // throws serialization-related exceptions
}
else if (obj is ValueType)
{
WriteValue((ValueType)obj); // throws exceptions (see WriteValue(ValueType val))
}
else
{
//throw new ArgumentTypeException("obj");
}
}
}
public void WriteObject<T>(T obj)
{
WriteObject(typeof(T), obj); // throws exceptions (see WriteObject(Type type, object obj))
}
public void WriteValueOrObject(Type type, object obj)
{
//Utils.ThrowException(type == null ? new ArgumentNullException("type") : null);
//Utils.ThrowException(!type.IsAssignableFrom(obj.GetType()) ? new ArgumentTypeException("obj") : null);
if (type.IsValueType)
{
WriteValue((ValueType)obj); // throws exceptions (see WriteValue(ValueType val))
}
else
{
WriteObject(type, obj); // throws exceptions (see WriteObject(Type type, object obj))
}
}
public void WriteValueOrObject<T>(T obj)
{
WriteValueOrObject(typeof(T), obj); // throws exceptions (see WriteValueOrObject(Type type, object obj))
}
public void WriteType(Type type)
{
//Utils.ThrowException(type == null ? new ArgumentNullException("type") : null);
WriteString8(GetShortTypeName(type.AssemblyQualifiedName));
}
// *** Data directory ***
public string DataDir
{
get { return m_data_dir; }
set
{
//Utils.ThrowException(!Utils.VerifyPathName(value, /*must_exist=*/true) ? new InvalidArgumentValueException("DataDir") : null);
m_data_dir = value;
}
}
// *** Access to the associated stream ***
public void Close()
{
m_stream.Close();
}
public void Flush()
{
m_stream.Flush(); // throws IOException
}
public Stream Stream
{
get { return m_stream; }
}
}
}
#endif

@ -1,165 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProductVersion>9.0.21022</ProductVersion>
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>{A39293C1-92D8-47B9-93A4-41F443B4F9E4}</ProjectGuid>
<OutputType>Library</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>LemmaSharp</RootNamespace>
<AssemblyName>LemmaSharp</AssemblyName>
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<IsWebBootstrapper>true</IsWebBootstrapper>
<StartupObject>
</StartupObject>
<FileUpgradeFlags>
</FileUpgradeFlags>
<UpgradeBackupLocation>
</UpgradeBackupLocation>
<OldToolsVersion>3.5</OldToolsVersion>
<TargetFrameworkProfile />
<PublishUrl>http://localhost/LemmaSharp/</PublishUrl>
<Install>true</Install>
<InstallFrom>Web</InstallFrom>
<UpdateEnabled>true</UpdateEnabled>
<UpdateMode>Foreground</UpdateMode>
<UpdateInterval>7</UpdateInterval>
<UpdateIntervalUnits>Days</UpdateIntervalUnits>
<UpdatePeriodically>false</UpdatePeriodically>
<UpdateRequired>false</UpdateRequired>
<MapFileExtensions>true</MapFileExtensions>
<ApplicationRevision>0</ApplicationRevision>
<ApplicationVersion>1.0.0.%2a</ApplicationVersion>
<UseApplicationTrust>false</UseApplicationTrust>
<BootstrapperEnabled>true</BootstrapperEnabled>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>TRACE;DEBUG;NOLATINO</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<Prefer32Bit>false</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE;NOLATINO</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<Prefer32Bit>false</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">
<DebugSymbols>true</DebugSymbols>
<OutputPath>bin\x86\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DebugType>full</DebugType>
<PlatformTarget>x86</PlatformTarget>
<CodeAnalysisUseTypeNameInSuppression>true</CodeAnalysisUseTypeNameInSuppression>
<CodeAnalysisModuleSuppressionsFile>GlobalSuppressions.cs</CodeAnalysisModuleSuppressionsFile>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>false</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' ">
<OutputPath>bin\x86\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<Optimize>true</Optimize>
<DebugType>pdbonly</DebugType>
<PlatformTarget>x86</PlatformTarget>
<CodeAnalysisUseTypeNameInSuppression>true</CodeAnalysisUseTypeNameInSuppression>
<CodeAnalysisModuleSuppressionsFile>GlobalSuppressions.cs</CodeAnalysisModuleSuppressionsFile>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>false</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x64' ">
<DebugSymbols>true</DebugSymbols>
<OutputPath>bin\x64\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DebugType>full</DebugType>
<PlatformTarget>x64</PlatformTarget>
<CodeAnalysisUseTypeNameInSuppression>true</CodeAnalysisUseTypeNameInSuppression>
<CodeAnalysisModuleSuppressionsFile>GlobalSuppressions.cs</CodeAnalysisModuleSuppressionsFile>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>false</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x64' ">
<OutputPath>bin\x64\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<Optimize>true</Optimize>
<DebugType>pdbonly</DebugType>
<PlatformTarget>x64</PlatformTarget>
<CodeAnalysisUseTypeNameInSuppression>true</CodeAnalysisUseTypeNameInSuppression>
<CodeAnalysisModuleSuppressionsFile>GlobalSuppressions.cs</CodeAnalysisModuleSuppressionsFile>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>false</Prefer32Bit>
</PropertyGroup>
<ItemGroup>
<Reference Include="Lzma#, Version=4.12.3884.11200, Culture=neutral, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>ExternalLibs\Lzma#.dll</HintPath>
</Reference>
<Reference Include="System" />
<Reference Include="System.Data" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="LatinoCompatibility\BinarySerializer.cs">
<SubType>Code</SubType>
</Compile>
<Compile Include="Interfaces\ILemmatizer.cs" />
<Compile Include="Interfaces\ILemmatizerModel.cs" />
<Compile Include="Interfaces\ILemmatizerTrainable.cs" />
<Compile Include="Classes\LemmatizerSettings.cs" />
<Compile Include="Classes\LemmaRule.cs" />
<Compile Include="Classes\Lemmatizer.cs" />
<Compile Include="Classes\LemmaTreeNode.cs" />
<Compile Include="Classes\LemmaExample.cs" />
<Compile Include="Classes\ExampleList.cs" />
<Compile Include="Classes\RuleList.cs" />
<Compile Include="Classes\RuleWeighted.cs" />
</ItemGroup>
<ItemGroup>
<BootstrapperPackage Include="Microsoft.Net.Client.3.5">
<Visible>False</Visible>
<ProductName>.NET Framework Client Profile</ProductName>
<Install>false</Install>
</BootstrapperPackage>
<BootstrapperPackage Include="Microsoft.Net.Framework.2.0">
<Visible>False</Visible>
<ProductName>.NET Framework 2.0 %28x86%29</ProductName>
<Install>true</Install>
</BootstrapperPackage>
<BootstrapperPackage Include="Microsoft.Net.Framework.3.0">
<Visible>False</Visible>
<ProductName>.NET Framework 3.0 %28x86%29</ProductName>
<Install>false</Install>
</BootstrapperPackage>
<BootstrapperPackage Include="Microsoft.Net.Framework.3.5">
<Visible>False</Visible>
<ProductName>.NET Framework 3.5</ProductName>
<Install>false</Install>
</BootstrapperPackage>
<BootstrapperPackage Include="Microsoft.Net.Framework.3.5.SP1">
<Visible>False</Visible>
<ProductName>.NET Framework 3.5 SP1</ProductName>
<Install>false</Install>
</BootstrapperPackage>
</ItemGroup>
<ItemGroup>
<Folder Include="Properties\" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>

@ -1,28 +0,0 @@
namespace LemmaSharp
{
public enum LanguagePrebuilt
{
//from Multext-East v4 lexicons
Bulgarian,
Czech,
English,
Estonian,
Persian,
French,
Hungarian,
Macedonian,
Polish,
Romanian,
Russian,
Slovak,
Slovene,
Serbian,
Ukrainian,
//from Multext lexicons
EnglishMT,
FrenchMT,
German,
Italian,
Spanish,
}
}

@ -1,118 +0,0 @@
using System;
using System.IO;
using System.Reflection;
using System.Runtime.Serialization;
namespace LemmaSharp
{
[Serializable]
public abstract class LemmatizerPrebuilt : Lemmatizer
{
#region Private Variables
private static string[] asLangMapping = new string[] {
"bg", "mlteast",
"cs", "mlteast",
"en", "mlteast",
"et", "mlteast",
"fa", "mlteast",
"fr", "mlteast",
"hu", "mlteast",
"mk", "mlteast",
"pl", "mlteast",
"ro", "mlteast",
"ru", "mlteast",
"sk", "mlteast",
"sl", "mlteast",
"sr", "mlteast",
"uk", "mlteast",
"en", "multext",
"fr", "multext",
"ge", "multext",
"it", "multext",
"sp", "multext",
};
private LanguagePrebuilt lang;
#endregion
#region Constructor(s)
public LemmatizerPrebuilt(LanguagePrebuilt lang)
: base()
{
this.lang = lang;
}
public LemmatizerPrebuilt(LanguagePrebuilt lang, LemmatizerSettings lsett)
: base(lsett)
{
this.lang = lang;
}
#endregion
#region Private Properties Helping Functions
protected string GetResourceFileName(string sFileMask)
{
return GetResourceFileName(sFileMask, lang);
}
public static string GetResourceFileName(string sFileMask, LanguagePrebuilt lang)
{
string langFileName = asLangMapping[(int)lang * 2 + 1] + '-' + asLangMapping[(int)lang * 2];
return string.Format(sFileMask, langFileName);
}
#endregion
#region Public Properties
public LanguagePrebuilt Language
{
get
{
return lang;
}
}
public LexiconPrebuilt Lexicon
{
get
{
return GetLexicon(lang);
}
}
#endregion
#region Public Properties
public static LexiconPrebuilt GetLexicon(LanguagePrebuilt lang)
{
return (LexiconPrebuilt)Enum.Parse(typeof(LexiconPrebuilt), asLangMapping[((int)lang) * 2 + 1], true);
}
#endregion
#region Resource Management Functions
protected abstract Assembly GetExecutingAssembly();
protected Stream GetResourceStream(string sResourceShortName)
{
var assembly = GetExecutingAssembly();
string sResourceName = null;
foreach (var sResource in assembly.GetManifestResourceNames())
{
if (sResource.EndsWith(sResourceShortName))
{
sResourceName = sResource;
break;
}
}
if (String.IsNullOrEmpty(sResourceName))
return null;
return assembly.GetManifestResourceStream(sResourceName);
}
#endregion
#region Serialization Functions
public LemmatizerPrebuilt(SerializationInfo info, StreamingContext context)
: base(info, context)
{
}
#endregion
}
}

@ -1,29 +0,0 @@
using System;
using System.IO;
using System.Reflection;
namespace LemmaSharp
{
[Serializable]
public class LemmatizerPrebuiltCompact : LemmatizerPrebuilt
{
public const string FILEMASK = "compact7z-{0}.lem";
#region Constructor(s) & Destructor(s)
public LemmatizerPrebuiltCompact(LanguagePrebuilt lang)
: base(lang)
{
Stream stream = GetResourceStream(GetResourceFileName(FILEMASK));
this.Deserialize(stream);
stream.Close();
}
#endregion
#region Resource Management Functions
protected override Assembly GetExecutingAssembly()
{
return Assembly.GetExecutingAssembly();
}
#endregion
}
}

@ -1,8 +0,0 @@
namespace LemmaSharp
{
public enum LexiconPrebuilt
{
MltEast,
Multext
}
}

@ -1,132 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProductVersion>9.0.21022</ProductVersion>
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>{1E700D21-62D3-4525-93FE-C1FB0A1B0564}</ProjectGuid>
<OutputType>Library</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>LemmaSharp</RootNamespace>
<AssemblyName>LemmaSharpPrebuilt</AssemblyName>
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<FileUpgradeFlags>
</FileUpgradeFlags>
<UpgradeBackupLocation>
</UpgradeBackupLocation>
<OldToolsVersion>3.5</OldToolsVersion>
<PublishUrl>publish\</PublishUrl>
<Install>true</Install>
<InstallFrom>Disk</InstallFrom>
<UpdateEnabled>false</UpdateEnabled>
<UpdateMode>Foreground</UpdateMode>
<UpdateInterval>7</UpdateInterval>
<UpdateIntervalUnits>Days</UpdateIntervalUnits>
<UpdatePeriodically>false</UpdatePeriodically>
<UpdateRequired>false</UpdateRequired>
<MapFileExtensions>true</MapFileExtensions>
<ApplicationRevision>0</ApplicationRevision>
<ApplicationVersion>1.0.0.%2a</ApplicationVersion>
<IsWebBootstrapper>false</IsWebBootstrapper>
<UseApplicationTrust>false</UseApplicationTrust>
<BootstrapperEnabled>true</BootstrapperEnabled>
<TargetFrameworkProfile />
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<Prefer32Bit>false</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<Prefer32Bit>false</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">
<DebugSymbols>true</DebugSymbols>
<OutputPath>bin\x86\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DebugType>full</DebugType>
<PlatformTarget>x86</PlatformTarget>
<CodeAnalysisUseTypeNameInSuppression>true</CodeAnalysisUseTypeNameInSuppression>
<CodeAnalysisModuleSuppressionsFile>GlobalSuppressions.cs</CodeAnalysisModuleSuppressionsFile>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>false</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' ">
<OutputPath>bin\x86\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<Optimize>true</Optimize>
<DebugType>pdbonly</DebugType>
<PlatformTarget>x86</PlatformTarget>
<CodeAnalysisUseTypeNameInSuppression>true</CodeAnalysisUseTypeNameInSuppression>
<CodeAnalysisModuleSuppressionsFile>GlobalSuppressions.cs</CodeAnalysisModuleSuppressionsFile>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>false</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x64' ">
<DebugSymbols>true</DebugSymbols>
<OutputPath>bin\x64\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DebugType>full</DebugType>
<PlatformTarget>x64</PlatformTarget>
<CodeAnalysisUseTypeNameInSuppression>true</CodeAnalysisUseTypeNameInSuppression>
<CodeAnalysisModuleSuppressionsFile>GlobalSuppressions.cs</CodeAnalysisModuleSuppressionsFile>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>false</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x64' ">
<OutputPath>bin\x64\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<Optimize>true</Optimize>
<DebugType>pdbonly</DebugType>
<PlatformTarget>x64</PlatformTarget>
<CodeAnalysisUseTypeNameInSuppression>true</CodeAnalysisUseTypeNameInSuppression>
<CodeAnalysisModuleSuppressionsFile>GlobalSuppressions.cs</CodeAnalysisModuleSuppressionsFile>
<ErrorReport>prompt</ErrorReport>
<Prefer32Bit>false</Prefer32Bit>
</PropertyGroup>
<ItemGroup>
</ItemGroup>
<ItemGroup>
<Compile Include="Classes\LanguagePrebuilt.cs" />
<Compile Include="Classes\LemmatizerPrebuilt.cs" />
<Compile Include="Classes\LexiconPrebuilt.cs" />
</ItemGroup>
<ItemGroup>
<Folder Include="Data\" />
<Folder Include="Properties\" />
</ItemGroup>
<ItemGroup>
<BootstrapperPackage Include="Microsoft.Net.Framework.3.5.SP1">
<Visible>False</Visible>
<ProductName>.NET Framework 3.5 SP1</ProductName>
<Install>true</Install>
</BootstrapperPackage>
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\LemmaSharp\LemmaSharp.csproj">
<Project>{a39293c1-92d8-47b9-93a4-41f443b4f9e4}</Project>
<Name>LemmaSharp</Name>
</ProjectReference>
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>

@ -1,111 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProductVersion>9.0.21022</ProductVersion>
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>{9BA3F2C4-5DAB-4D7B-B431-B072A0D8FC6A}</ProjectGuid>
<OutputType>Library</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>LemmaSharpPrebuiltCompact</RootNamespace>
<AssemblyName>LemmaSharpPrebuiltCompact</AssemblyName>
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<FileUpgradeFlags>
</FileUpgradeFlags>
<UpgradeBackupLocation>
</UpgradeBackupLocation>
<OldToolsVersion>3.5</OldToolsVersion>
<TargetFrameworkProfile />
<PublishUrl>publish\</PublishUrl>
<Install>true</Install>
<InstallFrom>Disk</InstallFrom>
<UpdateEnabled>false</UpdateEnabled>
<UpdateMode>Foreground</UpdateMode>
<UpdateInterval>7</UpdateInterval>
<UpdateIntervalUnits>Days</UpdateIntervalUnits>
<UpdatePeriodically>false</UpdatePeriodically>
<UpdateRequired>false</UpdateRequired>
<MapFileExtensions>true</MapFileExtensions>
<ApplicationRevision>0</ApplicationRevision>
<ApplicationVersion>1.0.0.%2a</ApplicationVersion>
<IsWebBootstrapper>false</IsWebBootstrapper>
<UseApplicationTrust>false</UseApplicationTrust>
<BootstrapperEnabled>true</BootstrapperEnabled>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<Prefer32Bit>false</Prefer32Bit>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<Prefer32Bit>false</Prefer32Bit>
</PropertyGroup>
<ItemGroup>
</ItemGroup>
<ItemGroup>
<Compile Include="Classes\LemmatizerPrebuiltCompact.cs" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\LemmaSharp\LemmaSharp.csproj">
<Project>{A39293C1-92D8-47B9-93A4-41F443B4F9E4}</Project>
<Name>LemmaSharp</Name>
</ProjectReference>
<ProjectReference Include="LemmaSharpPrebuilt.csproj">
<Project>{1E700D21-62D3-4525-93FE-C1FB0A1B0564}</Project>
<Name>LemmaSharpPrebuilt</Name>
</ProjectReference>
</ItemGroup>
<ItemGroup>
<EmbeddedResource Include="Data\compact7z-mlteast-bg.lem" />
<EmbeddedResource Include="Data\compact7z-mlteast-cs.lem" />
<EmbeddedResource Include="Data\compact7z-mlteast-en.lem" />
<EmbeddedResource Include="Data\compact7z-mlteast-et.lem" />
<EmbeddedResource Include="Data\compact7z-mlteast-fa.lem" />
<EmbeddedResource Include="Data\compact7z-mlteast-fr.lem" />
<EmbeddedResource Include="Data\compact7z-mlteast-hu.lem" />
<EmbeddedResource Include="Data\compact7z-mlteast-mk.lem" />
<EmbeddedResource Include="Data\compact7z-mlteast-pl.lem" />
<EmbeddedResource Include="Data\compact7z-mlteast-ro.lem" />
<EmbeddedResource Include="Data\compact7z-mlteast-ru.lem" />
<EmbeddedResource Include="Data\compact7z-mlteast-sk.lem" />
<EmbeddedResource Include="Data\compact7z-mlteast-sl.lem" />
<EmbeddedResource Include="Data\compact7z-mlteast-sr.lem" />
<EmbeddedResource Include="Data\compact7z-mlteast-uk.lem" />
<EmbeddedResource Include="Data\compact7z-multext-en.lem" />
<EmbeddedResource Include="Data\compact7z-multext-fr.lem" />
<EmbeddedResource Include="Data\compact7z-multext-ge.lem" />
<EmbeddedResource Include="Data\compact7z-multext-it.lem" />
<EmbeddedResource Include="Data\compact7z-multext-sp.lem" />
</ItemGroup>
<ItemGroup>
<Folder Include="Properties\" />
</ItemGroup>
<ItemGroup>
<BootstrapperPackage Include="Microsoft.Net.Framework.3.5.SP1">
<Visible>False</Visible>
<ProductName>.NET Framework 3.5 SP1</ProductName>
<Install>true</Install>
</BootstrapperPackage>
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>

@ -1,58 +0,0 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 14
VisualStudioVersion = 14.0.25420.1
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LemmaSharp", "LemmaSharp\LemmaSharp.csproj", "{A39293C1-92D8-47B9-93A4-41F443B4F9E4}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LemmaSharpPrebuiltCompact", "LemmaSharpPrebuilt\LemmaSharpPrebuiltCompact.csproj", "{9BA3F2C4-5DAB-4D7B-B431-B072A0D8FC6A}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LemmaSharpPrebuilt", "LemmaSharpPrebuilt\LemmaSharpPrebuilt.csproj", "{1E700D21-62D3-4525-93FE-C1FB0A1B0564}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Debug|x64 = Debug|x64
Debug|x86 = Debug|x86
Release|Any CPU = Release|Any CPU
Release|x64 = Release|x64
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x64.ActiveCfg = Debug|x64
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x64.Build.0 = Debug|x64
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x86.ActiveCfg = Debug|x86
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Debug|x86.Build.0 = Debug|x86
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|Any CPU.Build.0 = Release|Any CPU
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x64.ActiveCfg = Release|x64
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x64.Build.0 = Release|x64
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x86.ActiveCfg = Release|x86
{A39293C1-92D8-47B9-93A4-41F443B4F9E4}.Release|x86.Build.0 = Release|x86
{9BA3F2C4-5DAB-4D7B-B431-B072A0D8FC6A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{9BA3F2C4-5DAB-4D7B-B431-B072A0D8FC6A}.Debug|Any CPU.Build.0 = Debug|Any CPU
{9BA3F2C4-5DAB-4D7B-B431-B072A0D8FC6A}.Debug|x64.ActiveCfg = Debug|Any CPU
{9BA3F2C4-5DAB-4D7B-B431-B072A0D8FC6A}.Debug|x86.ActiveCfg = Debug|Any CPU
{9BA3F2C4-5DAB-4D7B-B431-B072A0D8FC6A}.Release|Any CPU.ActiveCfg = Release|Any CPU
{9BA3F2C4-5DAB-4D7B-B431-B072A0D8FC6A}.Release|Any CPU.Build.0 = Release|Any CPU
{9BA3F2C4-5DAB-4D7B-B431-B072A0D8FC6A}.Release|x64.ActiveCfg = Release|Any CPU
{9BA3F2C4-5DAB-4D7B-B431-B072A0D8FC6A}.Release|x86.ActiveCfg = Release|Any CPU
{1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|Any CPU.Build.0 = Debug|Any CPU
{1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|x64.ActiveCfg = Debug|x64
{1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|x64.Build.0 = Debug|x64
{1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|x86.ActiveCfg = Debug|x86
{1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Debug|x86.Build.0 = Debug|x86
{1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|Any CPU.ActiveCfg = Release|Any CPU
{1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|Any CPU.Build.0 = Release|Any CPU
{1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|x64.ActiveCfg = Release|x64
{1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|x64.Build.0 = Release|x64
{1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|x86.ActiveCfg = Release|x86
{1E700D21-62D3-4525-93FE-C1FB0A1B0564}.Release|x86.Build.0 = Release|x86
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

@ -1,381 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Runtime.Serialization;
using System.Text;
namespace LemmaSharp
{
[Serializable]
public class ExampleList : ISerializable
{
#region Private Variables
private LemmatizerSettings lsett;
private RuleList rlRules;
private Dictionary<string, LemmaExample> dictExamples;
private List<LemmaExample> lstExamples;
#endregion
#region Constructor(s)
public ExampleList(LemmatizerSettings lsett) : base()
{
this.lsett = lsett;
this.dictExamples = new Dictionary<string, LemmaExample>();
this.lstExamples = null;
this.rlRules = new RuleList(lsett);
}
public ExampleList(StreamReader srIn, string sFormat, LemmatizerSettings lsett) : this(lsett)
{
AddMultextFile(srIn, sFormat);
}
#endregion
#region Public Properties & Indexers
public LemmaExample this[int i]
{
get
{
if (lstExamples == null)
FinalizeAdditions();
return lstExamples[i];
}
}
public int Count
{
get
{
if (lstExamples == null)
FinalizeAdditions();
return lstExamples.Count;
}
}
public double WeightSum
{
get
{
if (lstExamples == null)
FinalizeAdditions();
double dWeight = 0;
foreach (LemmaExample exm in lstExamples)
dWeight += exm.Weight;
return dWeight;
}
}
public RuleList Rules
{
get
{
return rlRules;
}
}
public List<LemmaExample> ListExamples
{
get
{
if (lstExamples == null)
FinalizeAdditions();
return lstExamples;
}
}
#endregion
#region Essential Class Functions (adding/removing examples)
public void AddMultextFile(StreamReader srIn, string sFormat)
{
//read from file
string sLine = null;
int iError = 0;
int iLine = 0;
var iW = sFormat.IndexOf('W');
var iL = sFormat.IndexOf('L');
var iM = sFormat.IndexOf('M');
var iF = sFormat.IndexOf('F');
var iLen = Math.Max(Math.Max(iW, iL), Math.Max(iM, iF)) + 1;
if (iW < 0 || iL < 0)
{
throw new Exception("Can not find word and lemma location in the format specification");
}
while ((sLine = srIn.ReadLine()) != null && iError < 50)
{
iLine++;
string[] asWords = sLine.Split(new char[] { '\t' });
if (asWords.Length < iLen)
{
//Console.WriteLine("ERROR: Line doesn't confirm to the given format \"" + sFormat + "\"! Line " + iLine.ToString() + ".");
iError++;
continue;
}
var sWord = asWords[iW];
var sLemma = asWords[iL];
if (sLemma.Equals("=", StringComparison.Ordinal))
sLemma = sWord;
string sMsd = null;
if (iM > -1)
sMsd = asWords[iM];
double dWeight = 1; ;
if (iF > -1)
Double.TryParse(asWords[iM], out dWeight);
AddExample(sWord, sLemma, dWeight, sMsd);
}
if (iError == 50)
throw new Exception("Parsing stopped because of too many (50) errors. Check format specification");
}
public LemmaExample AddExample(string sWord, string sLemma, double dWeight, string sMsd)
{
string sNewMsd = lsett.eMsdConsider != LemmatizerSettings.MsdConsideration.Ignore
? sMsd
: null;
var leNew = new LemmaExample(sWord, sLemma, dWeight, sNewMsd, rlRules, lsett);
return Add(leNew);
}
private LemmaExample Add(LemmaExample leNew)
{
LemmaExample leReturn = null;
if (!dictExamples.TryGetValue(leNew.Signature, out leReturn))
{
leReturn = leNew;
dictExamples.Add(leReturn.Signature, leReturn);
}
else
leReturn.Join(leNew);
lstExamples = null;
return leReturn;
}
public void DropExamples()
{
dictExamples.Clear();
lstExamples = null;
}
public void FinalizeAdditions()
{
if (lstExamples != null)
return;
lstExamples = new List<LemmaExample>(dictExamples.Values);
lstExamples.Sort();
}
public ExampleList GetFrontRearExampleList(bool front)
{
var elExamplesNew = new ExampleList(lsett);
foreach (var le in this.ListExamples)
{
if (front)
elExamplesNew.AddExample(le.WordFront, le.LemmaFront, le.Weight, le.Msd);
else
elExamplesNew.AddExample(le.WordRear, le.LemmaRear, le.Weight, le.Msd);
}
elExamplesNew.FinalizeAdditions();
return elExamplesNew;
}
#endregion
#region Output Functions (ToString)
public override string ToString()
{
var sb = new StringBuilder();
foreach (var exm in lstExamples)
{
sb.AppendLine(exm.ToString());
}
return sb.ToString();
}
#endregion
#region Serialization Functions (.Net Default - ISerializable)
public void GetObjectData(SerializationInfo info, StreamingContext context)
{
info.AddValue("lsett", lsett);
info.AddValue("iNumExamples", dictExamples.Count);
var aWords = new string[dictExamples.Count];
var aLemmas = new string[dictExamples.Count];
var aWeights = new double[dictExamples.Count];
var aMsds = new string[dictExamples.Count];
int iExm = 0;
foreach (var exm in dictExamples.Values)
{
aWords[iExm] = exm.Word;
aLemmas[iExm] = exm.Lemma;
aWeights[iExm] = exm.Weight;
aMsds[iExm] = exm.Msd;
iExm++;
}
info.AddValue("aWords", aWords);
info.AddValue("aLemmas", aLemmas);
info.AddValue("aWeights", aWeights);
info.AddValue("aMsds", aMsds);
}
public ExampleList(SerializationInfo info, StreamingContext context)
{
lsett = (LemmatizerSettings)info.GetValue("lsett", typeof(LemmatizerSettings));
this.dictExamples = new Dictionary<string, LemmaExample>();
this.lstExamples = null;
this.rlRules = new RuleList(lsett);
var aWords = (string[])info.GetValue("aWords", typeof(string[]));
var aLemmas = (string[])info.GetValue("aLemmas", typeof(string[]));
var aWeights = (double[])info.GetValue("aWeights", typeof(double[]));
var aMsds = (string[])info.GetValue("aMsds", typeof(string[]));
for (int iExm = 0; iExm < aWords.Length; iExm++)
AddExample(aWords[iExm], aLemmas[iExm], aWeights[iExm], aMsds[iExm]);
}
#endregion
#region Serialization Functions (Binary)
public void Serialize(BinaryWriter binWrt, bool bSerializeExamples, bool bThisTopObject)
{
//save metadata
binWrt.Write(bThisTopObject);
//save refernce types if needed -------------------------
if (bThisTopObject)
lsett.Serialize(binWrt);
rlRules.Serialize(binWrt, false);
if (!bSerializeExamples)
{
binWrt.Write(false); // lstExamples == null
binWrt.Write(0); // dictExamples.Count == 0
}
else
{
if (lstExamples == null)
{
binWrt.Write(false); // lstExamples == null
//save dictionary items
int iCount = dictExamples.Count;
binWrt.Write(iCount);
foreach (var kvp in dictExamples)
{
binWrt.Write(kvp.Value.Rule.Signature);
kvp.Value.Serialize(binWrt, false);
}
}
else
{
binWrt.Write(true); // lstExamples != null
//save list & dictionary items
var iCount = lstExamples.Count;
binWrt.Write(iCount);
foreach (var le in lstExamples)
{
binWrt.Write(le.Rule.Signature);
le.Serialize(binWrt, false);
}
}
}
}
public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett)
{
//load metadata
var bThisTopObject = binRead.ReadBoolean();
//load refernce types if needed -------------------------
if (bThisTopObject)
this.lsett = new LemmatizerSettings(binRead);
else
this.lsett = lsett;
rlRules = new RuleList(binRead, this.lsett);
var bCreateLstExamples = binRead.ReadBoolean();
lstExamples = bCreateLstExamples ? new List<LemmaExample>() : null;
dictExamples = new Dictionary<string, LemmaExample>();
//load dictionary items
var iCount = binRead.ReadInt32();
for (var iId = 0; iId < iCount; iId++)
{
var lrRule = rlRules[binRead.ReadString()];
var le = new LemmaExample(binRead, this.lsett, lrRule);
dictExamples.Add(le.Signature, le);
if (bCreateLstExamples)
lstExamples.Add(le);
}
}
public ExampleList(BinaryReader binRead, LemmatizerSettings lsett)
{
Deserialize(binRead, lsett);
}
#endregion
#region Serialization Functions (Latino)
#if LATINO
public void Save(Latino.BinarySerializer binWrt, bool bSerializeExamples, bool bThisTopObject) {
//save metadata
binWrt.WriteBool(bThisTopObject);
//save refernce types if needed -------------------------
if (bThisTopObject)
lsett.Save(binWrt);
rlRules.Save(binWrt, false);
if (!bSerializeExamples) {
binWrt.WriteBool(false); // lstExamples == null
binWrt.WriteInt(0); // dictExamples.Count == 0
}
else {
if (lstExamples == null) {
binWrt.WriteBool(false); // lstExamples == null
//save dictionary items
int iCount = dictExamples.Count;
binWrt.WriteInt(iCount);
foreach (KeyValuePair<string, LemmaExample> kvp in dictExamples) {
binWrt.WriteString(kvp.Value.Rule.Signature);
kvp.Value.Save(binWrt, false);
}
}
else {
binWrt.WriteBool(true); // lstExamples != null
//save list & dictionary items
int iCount = lstExamples.Count;
binWrt.WriteInt(iCount);
foreach (LemmaExample le in lstExamples) {
binWrt.WriteString(le.Rule.Signature);
le.Save(binWrt, false);
}
}
}
}
public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett) {
//load metadata
bool bThisTopObject = binRead.ReadBool();
//load refernce types if needed -------------------------
if (bThisTopObject)
this.lsett = new LemmatizerSettings(binRead);
else
this.lsett = lsett;
rlRules = new RuleList(binRead, this.lsett);
bool bCreateLstExamples = binRead.ReadBool();
lstExamples = bCreateLstExamples ? new List<LemmaExample>() : null;
dictExamples = new Dictionary<string, LemmaExample>();
//load dictionary items
int iCount = binRead.ReadInt();
for (int iId = 0; iId < iCount; iId++) {
LemmaRule lrRule = rlRules[binRead.ReadString()];
LemmaExample le = new LemmaExample(binRead, this.lsett, lrRule);
dictExamples.Add(le.Signature, le);
if (bCreateLstExamples) lstExamples.Add(le);
}
}
public ExampleList(Latino.BinarySerializer binRead, LemmatizerSettings lsett) {
Load(binRead, lsett);
}
#endif
#endregion
}
}

@ -1,481 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace LemmaSharp
{
public class LemmaExample : IComparable<LemmaExample>, IComparer<LemmaExample>
{
#region Private Variables
private string sWord;
private string sLemma;
private string sSignature;
private string sMsd;
private double dWeight;
private LemmaRule lrRule;
private LemmatizerSettings lsett;
private string sWordRearCache;
private string sWordFrontCache;
private string sLemmaFrontCache;
#endregion
#region Constructor(s)
public LemmaExample(string sWord, string sLemma, double dWeight, string sMsd, RuleList rlRules, LemmatizerSettings lsett)
{
this.lsett = lsett;
this.sWord = sWord;
this.sLemma = sLemma;
this.sMsd = sMsd;
this.dWeight = dWeight;
this.lrRule = rlRules.AddRule(this);
switch (lsett.eMsdConsider)
{
case LemmatizerSettings.MsdConsideration.Ignore:
case LemmatizerSettings.MsdConsideration.JoinAll:
case LemmatizerSettings.MsdConsideration.JoinDistinct:
case LemmatizerSettings.MsdConsideration.JoinSameSubstring:
sSignature = string.Format("[{0}]==>[{1}]", sWord, sLemma);
break;
case LemmatizerSettings.MsdConsideration.Distinct:
default:
sSignature = string.Format("[{0}]==>[{1}]({2})", sWord, sLemma, sMsd ?? "");
break;
}
this.sWordRearCache = null;
this.sWordFrontCache = null;
this.sLemmaFrontCache = null;
}
#endregion
#region Public Properties
public string Word
{
get
{
return sWord;
}
}
public string Lemma
{
get
{
return sLemma;
}
}
public string Msd
{
get
{
return sMsd;
}
}
public string Signature
{
get
{
return sSignature;
}
}
public double Weight
{
get
{
return dWeight;
}
}
public LemmaRule Rule
{
get
{
return lrRule;
}
}
/// <summary>
/// Word to be pre-lemmatized with Front-Lemmatizer into LemmaFront which is then lemmatized by standard Rear-Lemmatizer (Warning it is reversed)
/// </summary>
public string WordFront
{
get
{
if (sWordFrontCache == null)
sWordFrontCache = StringReverse(sWord);
return sWordFrontCache;
}
}
/// <summary>
/// Lemma to be produced by pre-lemmatizing with Front-Lemmatizer (Warning it is reversed)
/// </summary>
public string LemmaFront
{
get
{
if (sLemmaFrontCache == null)
sLemmaFrontCache = StringReverse(WordRear);
return sLemmaFrontCache;
}
}
/// <summary>
/// word to be lemmatized by standard Rear-Lemmatizer (it's beggining has been already modified by Front-Lemmatizer)
/// </summary>
public string WordRear
{
get
{
if (sWordRearCache == null)
{
int lemmaPos = 0, wordPos = 0;
var common = LongestCommonSubstring(sWord, sLemma, ref wordPos, ref lemmaPos);
sWordRearCache = lemmaPos == -1 ? sLemma : (sLemma.Substring(0, lemmaPos + common.Length) + sWord.Substring(wordPos + common.Length));
}
return sWordRearCache;
}
}
/// <summary>
/// lemma to be produced by standard Rear-Lemmatizer from WordRear
/// </summary>
public string LemmaRear
{
get
{
return sLemma;
}
}
#endregion
#region Essential Class Functions (joining two examples into one)
//TODO - this function is not totaly ok because sMsd should not be
//changed since it could be included in signature
public void Join(LemmaExample leJoin)
{
dWeight += leJoin.dWeight;
if (sMsd != null)
switch (lsett.eMsdConsider)
{
case LemmatizerSettings.MsdConsideration.Ignore:
sMsd = null;
break;
case LemmatizerSettings.MsdConsideration.Distinct:
break;
case LemmatizerSettings.MsdConsideration.JoinAll:
sMsd += "|" + leJoin.sMsd;
break;
case LemmatizerSettings.MsdConsideration.JoinDistinct:
var append = string.Format("|{0}", leJoin.sMsd);
if (false == sMsd.Equals(leJoin.sMsd, StringComparison.Ordinal) &&
sMsd.IndexOf(append) < 0)
{
sMsd += append;
}
break;
case LemmatizerSettings.MsdConsideration.JoinSameSubstring:
int iPos = 0;
var iMax = Math.Min(sMsd.Length, leJoin.sMsd.Length);
while (iPos < iMax && sMsd[iPos] == leJoin.sMsd[iPos])
iPos++;
sMsd = sMsd.Substring(0, iPos);
break;
default:
break;
}
}
#endregion
#region Essential Class Functions (calculating similarities betwen examples)
public int Similarity(LemmaExample le)
{
return Similarity(this, le);
}
public static int Similarity(LemmaExample le1, LemmaExample le2)
{
var sWord1 = le1.sWord;
var sWord2 = le2.sWord;
var iLen1 = sWord1.Length;
var iLen2 = sWord2.Length;
var iMaxLen = Math.Min(iLen1, iLen2);
for (var iPos = 1; iPos <= iMaxLen; iPos++)
{
if (sWord1[iLen1 - iPos] != sWord2[iLen2 - iPos])
return iPos - 1;
}
//TODO similarity should be bigger if two words are totaly equal
//if (sWord1 == sWord2)
// return iMaxLen + 1;
//else
return iMaxLen;
}
#endregion
#region Essential Class Functions (comparing examples - eg.: for sorting)
/// <summary>
/// Function used to comprare current MultextExample (ME) against argument ME.
/// Mainly used in for sorting lists of MEs.
/// </summary>
/// <param name="other"> MultextExample (ME) that we compare current ME against.</param>
/// <returns>1 if current ME is bigger, -1 if smaler and 0 if both are the same.</returns>
public int CompareTo(LemmaExample other)
{
var iComparison = CompareStrings(this.sWord, other.sWord, false);
if (iComparison != 0)
return iComparison;
iComparison = CompareStrings(this.sLemma, other.sLemma, true);
if (iComparison != 0)
return iComparison;
if (lsett.eMsdConsider == LemmatizerSettings.MsdConsideration.Distinct &&
this.sMsd != null && other.sMsd != null)
{
iComparison = CompareStrings(this.sMsd, other.sMsd, true);
if (iComparison != 0)
return iComparison;
}
return 0;
}
public int Compare(LemmaExample x, LemmaExample y)
{
return x.CompareTo(y);
}
public static int CompareStrings(string sStr1, string sStr2, bool bForward)
{
var iLen1 = sStr1.Length;
var iLen2 = sStr2.Length;
var iMaxLen = Math.Min(iLen1, iLen2);
if (bForward)
{
for (int iPos = 0; iPos < iMaxLen; iPos++)
{
if (sStr1[iPos] > sStr2[iPos])
return 1;
if (sStr1[iPos] < sStr2[iPos])
return -1;
}
}
else
{
for (int iPos = 1; iPos <= iMaxLen; iPos++)
{
if (sStr1[iLen1 - iPos] > sStr2[iLen2 - iPos])
return 1;
if (sStr1[iLen1 - iPos] < sStr2[iLen2 - iPos])
return -1;
}
}
if (iLen1 > iLen2)
return 1;
if (iLen1 < iLen2)
return -1;
return 0;
}
public static int EqualPrifixLen(string sStr1, string sStr2)
{
var iLen1 = sStr1.Length;
var iLen2 = sStr2.Length;
var iMaxLen = Math.Min(iLen1, iLen2);
for (var iPos = 0; iPos < iMaxLen; iPos++)
{
if (sStr1[iPos] != sStr2[iPos])
return iPos;
}
return iMaxLen;
}
public static string LongestCommonSubstring(string sStr1, string sStr2, ref int iPosInStr1, ref int iPosInStr2)
{
var l = new int[sStr1.Length + 1, sStr2.Length + 1];
int z = 0;
string ret = "";
iPosInStr1 = -1;
iPosInStr2 = -1;
for (var i = 0; i < sStr1.Length; i++)
{
for (var j = 0; j < sStr2.Length; j++)
{
if (sStr1[i] == sStr2[j])
{
if (i == 0 || j == 0)
{
l[i, j] = 1;
}
else
{
l[i, j] = l[i - 1, j - 1] + 1;
}
if (l[i, j] > z)
{
z = l[i, j];
iPosInStr1 = i - z + 1;
iPosInStr2 = j - z + 1;
ret = sStr1.Substring(i - z + 1, z);
}
}
}
}
return ret;
}
public static string StringReverse(string s)
{
if (s == null) return null;
char[] charArray = s.ToCharArray();
int len = s.Length - 1;
for (int i = 0; i < len; i++, len--)
{
charArray[i] ^= charArray[len];
charArray[len] ^= charArray[i];
charArray[i] ^= charArray[len];
}
return new string(charArray);
}
#endregion
#region Output Functions (ToString)
public override string ToString()
{
var sb = new StringBuilder();
if (sWord != null)
sb.AppendFormat("W:\"{0}\" ", sWord);
if (sLemma != null)
sb.AppendFormat("L:\"{0}\" ", sLemma);
if (sMsd != null)
sb.AppendFormat("M:\"{0}\" ", sMsd);
if (false == Double.IsNaN(dWeight))
sb.AppendFormat("F:\"{0}\" ", dWeight);
if (lrRule != null)
sb.AppendFormat("R:{0} ", lrRule);
if (sb.Length > 0)
return sb.ToString(0, sb.Length - 1);
return string.Empty;
}
#endregion
#region Serialization Functions (Binary)
public void Serialize(BinaryWriter binWrt, bool bThisTopObject)
{
//save metadata
binWrt.Write(bThisTopObject);
//save value types --------------------------------------
binWrt.Write(sWord);
binWrt.Write(sLemma);
binWrt.Write(sSignature);
if (sMsd == null)
{
binWrt.Write(false);
}
else
{
binWrt.Write(true);
binWrt.Write(sMsd);
}
binWrt.Write(dWeight);
//save refernce types if needed -------------------------
if (bThisTopObject)
{
lsett.Serialize(binWrt);
lrRule.Serialize(binWrt, false);
}
}
public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett, LemmaRule lrRule)
{
//load metadata
var bThisTopObject = binRead.ReadBoolean();
//load value types --------------------------------------
sWord = binRead.ReadString();
sLemma = binRead.ReadString();
sSignature = binRead.ReadString();
if (binRead.ReadBoolean())
sMsd = binRead.ReadString();
else
sMsd = null;
dWeight = binRead.ReadDouble();
//load refernce types if needed -------------------------
if (bThisTopObject)
{
this.lsett = new LemmatizerSettings(binRead);
this.lrRule = new LemmaRule(binRead, this.lsett);
}
else
{
this.lsett = lsett;
this.lrRule = lrRule;
}
this.sWordRearCache = null;
this.sWordFrontCache = null;
this.sLemmaFrontCache = null;
}
public LemmaExample(BinaryReader binRead, LemmatizerSettings lsett, LemmaRule lrRule)
{
Deserialize(binRead, lsett, lrRule);
}
#endregion
#region Serialization Functions (Latino)
#if LATINO
public void Save(Latino.BinarySerializer binWrt, bool bThisTopObject) {
//save metadata
binWrt.WriteBool(bThisTopObject);
//save value types --------------------------------------
binWrt.WriteString(sWord);
binWrt.WriteString(sLemma);
binWrt.WriteString(sSignature);
if (sMsd == null)
binWrt.WriteBool(false);
else {
binWrt.WriteBool(true);
binWrt.WriteString(sMsd);
}
binWrt.WriteDouble(dWeight);
//save refernce types if needed -------------------------
if (bThisTopObject) {
lsett.Save(binWrt);
lrRule.Save(binWrt, false);
}
}
public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett, LemmaRule lrRule) {
//load metadata
bool bThisTopObject = binRead.ReadBool();
//load value types --------------------------------------
sWord = binRead.ReadString();
sLemma = binRead.ReadString();
sSignature = binRead.ReadString();
if (binRead.ReadBool())
sMsd = binRead.ReadString();
else
sMsd = null;
dWeight = binRead.ReadDouble();
//load refernce types if needed -------------------------
if (bThisTopObject) {
this.lsett = new LemmatizerSettings(binRead);
this.lrRule = new LemmaRule(binRead, this.lsett);
}
else {
this.lsett = lsett;
this.lrRule = lrRule;
}
}
public LemmaExample(Latino.BinarySerializer binRead, LemmatizerSettings lsett, LemmaRule lrRule) {
Load(binRead, lsett, lrRule);
}
#endif
#endregion
}
}

@ -1,189 +0,0 @@
using System;
using System.IO;
namespace LemmaSharp
{
public class LemmaRule
{
#region Private Variables
private int iId;
private int iFrom;
private string sFrom;
private string sTo;
private string sSignature;
private LemmatizerSettings lsett;
#endregion
#region Constructor(s)
public LemmaRule(string sWord, string sLemma, int iId, LemmatizerSettings lsett)
{
this.lsett = lsett;
this.iId = iId;
int iSameStem = SameStem(sWord, sLemma);
sTo = sLemma.Substring(iSameStem);
iFrom = sWord.Length - iSameStem;
if (lsett.bUseFromInRules)
{
sFrom = sWord.Substring(iSameStem);
sSignature = string.Format("[{0}]==>[{1}]", sFrom, sTo);
}
else
{
sFrom = null;
sSignature = string.Format("[#{0}]==>[{1}]", iFrom, sTo);
}
}
#endregion
#region Public Properties
public string Signature
{
get
{
return sSignature;
}
}
public int Id
{
get
{
return iId;
}
}
#endregion
#region Essential Class Functions
private static int SameStem(string sStr1, string sStr2)
{
var iLen1 = sStr1.Length;
var iLen2 = sStr2.Length;
var iMaxLen = Math.Min(iLen1, iLen2);
for (var iPos = 0; iPos < iMaxLen; iPos++)
{
if (sStr1[iPos] != sStr2[iPos])
return iPos;
}
return iMaxLen;
}
public bool IsApplicableToGroup(int iGroupCondLen)
{
return iGroupCondLen >= iFrom;
}
public string Lemmatize(string sWord)
{
return sWord.Substring(0, sWord.Length - iFrom) + sTo;
}
#endregion
#region Output Functions (ToString)
public override string ToString()
{
return string.Format("{0}:{1}", iId, sSignature);
}
#endregion
#region Serialization Functions (Binary)
public void Serialize(BinaryWriter binWrt, bool bThisTopObject)
{
//save metadata
binWrt.Write(bThisTopObject);
//save value types --------------------------------------
binWrt.Write(iId);
binWrt.Write(iFrom);
if (sFrom == null)
binWrt.Write(false);
else
{
binWrt.Write(true);
binWrt.Write(sFrom);
}
binWrt.Write(sTo);
binWrt.Write(sSignature);
if (bThisTopObject)
lsett.Serialize(binWrt);
}
public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett)
{
//load metadata
var bThisTopObject = binRead.ReadBoolean();
//load value types --------------------------------------
iId = binRead.ReadInt32();
iFrom = binRead.ReadInt32();
if (binRead.ReadBoolean())
{
sFrom = binRead.ReadString();
}
else
{
sFrom = null;
}
sTo = binRead.ReadString();
sSignature = binRead.ReadString();
//load refernce types if needed -------------------------
if (bThisTopObject)
this.lsett = new LemmatizerSettings(binRead);
else
this.lsett = lsett;
}
public LemmaRule(System.IO.BinaryReader binRead, LemmatizerSettings lsett)
{
this.Deserialize(binRead, lsett);
}
#endregion
#region Serialization Functions (Latino)
#if LATINO
public void Save(Latino.BinarySerializer binWrt, bool bThisTopObject) {
//save metadata
binWrt.WriteBool(bThisTopObject);
//save value types --------------------------------------
binWrt.WriteInt(iId);
binWrt.WriteInt(iFrom);
if (sFrom == null)
binWrt.WriteBool(false);
else {
binWrt.WriteBool(true);
binWrt.WriteString(sFrom);
}
binWrt.WriteString(sTo);
binWrt.WriteString(sSignature);
if (bThisTopObject)
lsett.Save(binWrt);
}
public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett) {
//load metadata
bool bThisTopObject = binRead.ReadBool();
//load value types --------------------------------------
iId = binRead.ReadInt();
iFrom = binRead.ReadInt();
if (binRead.ReadBool())
sFrom = binRead.ReadString();
else
sFrom = null;
sTo = binRead.ReadString();
sSignature = binRead.ReadString();
//load refernce types if needed -------------------------
if (bThisTopObject)
this.lsett = new LemmatizerSettings(binRead);
else
this.lsett = lsett;
}
public LemmaRule(Latino.BinarySerializer binRead, LemmatizerSettings lsett) {
Load(binRead, lsett);
}
#endif
#endregion
}
}

@ -1,478 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace LemmaSharp
{
[Serializable]
public class LemmaTreeNode : ILemmatizerModel
{
#region Private Variables
//settings
private LemmatizerSettings lsett;
//tree structure references
private Dictionary<char, LemmaTreeNode> dictSubNodes;
private LemmaTreeNode ltnParentNode;
//essential node properties
private int iSimilarity; //similarity among all words in this node
private string sCondition; //suffix that must match in order to lemmatize
private bool bWholeWord; //true if condition has to match to whole word
//rules and weights;
private LemmaRule lrBestRule; //the best rule to be applied when lemmatizing
private RuleWeighted[] aBestRules; //list of best rules
private double dWeight;
//source of this node
private int iStart;
private int iEnd;
private ExampleList elExamples;
#endregion
#region Constructor(s) & Destructor(s)
private LemmaTreeNode(LemmatizerSettings lsett)
{
this.lsett = lsett;
}
public LemmaTreeNode(LemmatizerSettings lsett, ExampleList elExamples)
: this(lsett, elExamples, 0, elExamples.Count - 1, null)
{
}
/// <summary>
///
/// </summary>
/// <param name="lsett"></param>
/// <param name="elExamples"></param>
/// <param name="iStart">Index of the first word of the current group</param>
/// <param name="iEnd">Index of the last word of the current group</param>
/// <param name="ltnParentNode"></param>
private LemmaTreeNode(LemmatizerSettings lsett, ExampleList elExamples, int iStart, int iEnd, LemmaTreeNode ltnParentNode) : this(lsett)
{
this.ltnParentNode = ltnParentNode;
this.dictSubNodes = null;
this.iStart = iStart;
this.iEnd = iEnd;
this.elExamples = elExamples;
if (iStart >= elExamples.Count || iEnd >= elExamples.Count || iStart > iEnd)
{
lrBestRule = elExamples.Rules.DefaultRule;
aBestRules = new RuleWeighted[1];
aBestRules[0] = new RuleWeighted(lrBestRule, 0);
dWeight = 0;
return;
}
int iConditionLength = Math.Min(ltnParentNode == null ? 0 : ltnParentNode.iSimilarity + 1, elExamples[iStart].Word.Length);
this.sCondition = elExamples[iStart].Word.Substring(elExamples[iStart].Word.Length - iConditionLength);
this.iSimilarity = elExamples[iStart].Similarity(elExamples[iEnd]);
this.bWholeWord = ltnParentNode == null ? false : elExamples[iEnd].Word.Length == ltnParentNode.iSimilarity;
FindBestRules();
AddSubAll();
//TODO check this heuristics, can be problematic when there are more applicable rules
if (dictSubNodes != null)
{
var lReplaceNodes = new List<KeyValuePair<char, LemmaTreeNode>>();
foreach (var kvpChild in dictSubNodes)
if (kvpChild.Value.dictSubNodes != null && kvpChild.Value.dictSubNodes.Count == 1)
{
var enumChildChild = kvpChild.Value.dictSubNodes.Values.GetEnumerator();
enumChildChild.MoveNext();
var ltrChildChild = enumChildChild.Current;
if (kvpChild.Value.lrBestRule == lrBestRule)
lReplaceNodes.Add(new KeyValuePair<char, LemmaTreeNode>(kvpChild.Key, ltrChildChild));
}
foreach (var kvpChild in lReplaceNodes)
{
dictSubNodes[kvpChild.Key] = kvpChild.Value;
kvpChild.Value.ltnParentNode = this;
}
}
}
#endregion
#region Public Properties
public int TreeSize
{
get
{
int iCount = 1;
if (dictSubNodes != null)
{
foreach (var ltnChild in dictSubNodes.Values)
{
iCount += ltnChild.TreeSize;
}
}
return iCount;
}
}
public double Weight
{
get
{
return dWeight;
}
}
#endregion
#region Essential Class Functions (building model)
private void FindBestRules()
{
/*
* LINQ SPEED TEST (Slower than current metodology)
*
List<LemmaExample> leApplicable = new List<LemmaExample>();
for (int iExm = iStart; iExm <= iEnd; iExm++)
if (elExamples[iExm].Rule.IsApplicableToGroup(sCondition.Length))
leApplicable.Add(elExamples[iExm]);
List<KeyValuePair<LemmaRule, double>> lBestRules = new List<KeyValuePair<LemmaRule,double>>();
lBestRules.AddRange(
leApplicable.
GroupBy<LemmaExample, LemmaRule, double, KeyValuePair<LemmaRule, double>>(
le => le.Rule,
le => le.Weight,
(lr, enumDbl) => new KeyValuePair<LemmaRule, double>(lr, enumDbl.Aggregate((acc, curr) => acc + curr))
).
OrderBy(kvpLrWght=>kvpLrWght.Value)
);
if (lBestRules.Count > 0)
lrBestRule = lBestRules[0].Key;
else {
lrBestRule = elExamples.Rules.DefaultRule;
}
*/
dWeight = 0;
//calculate dWeight of whole node and calculates qualities for all rules
var dictApplicableRules = new Dictionary<LemmaRule, double>();
//dictApplicableRules.Add(elExamples.Rules.DefaultRule, 0);
while (dictApplicableRules.Count == 0)
{
for (var iExm = iStart; iExm <= iEnd; iExm++)
{
var lr = elExamples[iExm].Rule;
var dExmWeight = elExamples[iExm].Weight;
dWeight += dExmWeight;
if (lr.IsApplicableToGroup(sCondition.Length))
{
if (dictApplicableRules.ContainsKey(lr))
dictApplicableRules[lr] += dExmWeight;
else
dictApplicableRules.Add(lr, dExmWeight);
}
}
//if none found then increase condition length or add some default appliable rule
if (dictApplicableRules.Count == 0)
{
if (this.sCondition.Length < iSimilarity)
this.sCondition = elExamples[iStart].Word.Substring(elExamples[iStart].Word.Length - (sCondition.Length + 1));
else
//TODO preveri hevristiko, mogoce je bolje ce se doda default rule namesto rulea od starsa
dictApplicableRules.Add(ltnParentNode.lrBestRule, 0);
}
}
//TODO can optimize this step using sorted list (dont add if it's worse than the worst)
var lSortedRules = new List<RuleWeighted>();
foreach (var kvp in dictApplicableRules)
{
lSortedRules.Add(new RuleWeighted(kvp.Key, kvp.Value / dWeight));
}
lSortedRules.Sort();
//keep just best iMaxRulesPerNode rules
var iNumRules = lSortedRules.Count;
if (lsett.iMaxRulesPerNode > 0)
iNumRules = Math.Min(lSortedRules.Count, lsett.iMaxRulesPerNode);
aBestRules = new RuleWeighted[iNumRules];
for (var iRule = 0; iRule < iNumRules; iRule++)
{
aBestRules[iRule] = lSortedRules[iRule];
}
//set best rule
lrBestRule = aBestRules[0].Rule;
//TODO must check if this hevristics is OK (to privilige parent rule)
if (ltnParentNode != null)
{
for (int iRule = 0; iRule < lSortedRules.Count &&
lSortedRules[iRule].Weight == lSortedRules[0].Weight; iRule++)
{
if (lSortedRules[iRule].Rule == ltnParentNode.lrBestRule)
{
lrBestRule = lSortedRules[iRule].Rule;
break;
}
}
}
}
private void AddSubAll()
{
int iStartGroup = iStart;
var chCharPrev = '\0';
var bSubGroupNeeded = false;
for (var iWrd = iStart; iWrd <= iEnd; iWrd++)
{
var sWord = elExamples[iWrd].Word;
var chCharThis = sWord.Length > iSimilarity ? sWord[sWord.Length - 1 - iSimilarity] : '\0';
if (iWrd != iStart && chCharPrev != chCharThis)
{
if (bSubGroupNeeded)
{
AddSub(iStartGroup, iWrd - 1, chCharPrev);
bSubGroupNeeded = false;
}
iStartGroup = iWrd;
}
//TODO check out bSubGroupNeeded when there are multiple posible rules (not just lrBestRule)
if (elExamples[iWrd].Rule != lrBestRule)
{
bSubGroupNeeded = true;
}
chCharPrev = chCharThis;
}
if (bSubGroupNeeded && iStartGroup != iStart)
{
AddSub(iStartGroup, iEnd, chCharPrev);
}
}
private void AddSub(int iStart, int iEnd, char chChar)
{
var ltnSub = new LemmaTreeNode(lsett, elExamples, iStart, iEnd, this);
//TODO - maybe not realy appropriate because loosing statisitcs from multiple possible rules
if (ltnSub.lrBestRule == lrBestRule && ltnSub.dictSubNodes == null)
return;
if (dictSubNodes == null)
dictSubNodes = new Dictionary<char, LemmaTreeNode>();
dictSubNodes.Add(chChar, ltnSub);
}
#endregion
#region Essential Class Functions (running model = lemmatizing)
public bool ConditionSatisfied(string sWord)
{
//if (bWholeWord)
// return sWord == sCondition;
//else
// return sWord.EndsWith(sCondition);
var iDiff = sWord.Length - sCondition.Length;
if (iDiff < 0 || (bWholeWord && iDiff > 0))
return false;
var iWrdEnd = sCondition.Length - ltnParentNode.sCondition.Length - 1;
for (var iChar = 0; iChar < iWrdEnd; iChar++)
{
if (sCondition[iChar] != sWord[iChar + iDiff])
return false;
}
return true;
}
public string Lemmatize(string sWord)
{
if (sWord.Length >= iSimilarity && dictSubNodes != null)
{
char chChar = sWord.Length > iSimilarity ? sWord[sWord.Length - 1 - iSimilarity] : '\0';
if (dictSubNodes.ContainsKey(chChar) && dictSubNodes[chChar].ConditionSatisfied(sWord))
return dictSubNodes[chChar].Lemmatize(sWord);
}
return lrBestRule.Lemmatize(sWord);
}
#endregion
#region Output Functions (ToString)
public override string ToString()
{
var sb = new StringBuilder();
ToString(sb, 0);
return sb.ToString();
}
private void ToString(StringBuilder sb, int iLevel)
{
sb.Append(new string('\t', iLevel));
sb.AppendFormat("Suffix=\"{0}{1}\"; ", bWholeWord ? "^" : string.Empty, sCondition);
sb.AppendFormat("Rule=\"{0}\"; ", lrBestRule);
sb.AppendFormat("Weight=\"{0}\"; ", dWeight);
if (aBestRules != null && aBestRules.Length > 0)
sb.AppendFormat("Cover={0}; ", aBestRules[0].Weight);
sb.Append("Rulles=");
if (aBestRules != null)
{
foreach (var rw in aBestRules)
sb.AppendFormat(" {0}", rw);
}
sb.Append("; ");
sb.AppendLine();
if (dictSubNodes != null)
{
foreach (var ltnChild in dictSubNodes.Values)
{
ltnChild.ToString(sb, iLevel + 1);
}
}
}
#endregion
#region Serialization Functions (Binary)
public void Serialize(BinaryWriter binWrt)
{
binWrt.Write(dictSubNodes != null);
if (dictSubNodes != null)
{
binWrt.Write(dictSubNodes.Count);
foreach (var kvp in dictSubNodes)
{
binWrt.Write(kvp.Key);
kvp.Value.Serialize(binWrt);
}
}
binWrt.Write(iSimilarity);
binWrt.Write(sCondition);
binWrt.Write(bWholeWord);
binWrt.Write(lrBestRule.Signature);
binWrt.Write(aBestRules.Length);
for (var i = 0; i < aBestRules.Length; i++)
{
binWrt.Write(aBestRules[i].Rule.Signature);
binWrt.Write(aBestRules[i].Weight);
}
binWrt.Write(dWeight);
binWrt.Write(iStart);
binWrt.Write(iEnd);
}
public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode)
{
this.lsett = lsett;
if (binRead.ReadBoolean())
{
dictSubNodes = new Dictionary<char, LemmaTreeNode>();
var iCount = binRead.ReadInt32();
for (var i = 0; i < iCount; i++)
{
var cKey = binRead.ReadChar();
var ltrSub = new LemmaTreeNode(binRead, this.lsett, elExamples, this);
dictSubNodes.Add(cKey, ltrSub);
}
}
else
{
dictSubNodes = null;
}
this.ltnParentNode = ltnParentNode;
iSimilarity = binRead.ReadInt32();
sCondition = binRead.ReadString();
bWholeWord = binRead.ReadBoolean();
lrBestRule = elExamples.Rules[binRead.ReadString()];
var iCountBest = binRead.ReadInt32();
aBestRules = new RuleWeighted[iCountBest];
for (var i = 0; i < iCountBest; i++)
{
aBestRules[i] =
new RuleWeighted(elExamples.Rules[binRead.ReadString()], binRead.ReadDouble());
}
dWeight = binRead.ReadDouble();
iStart = binRead.ReadInt32();
iEnd = binRead.ReadInt32();
this.elExamples = elExamples;
}
public LemmaTreeNode(BinaryReader binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode)
{
Deserialize(binRead, lsett, elExamples, ltnParentNode);
}
#endregion
#region Serialization Functions (Latino)
#if LATINO
public void Save(Latino.BinarySerializer binWrt) {
binWrt.WriteBool(dictSubNodes != null);
if (dictSubNodes != null) {
binWrt.WriteInt(dictSubNodes.Count);
foreach (KeyValuePair<char, LemmaTreeNode> kvp in dictSubNodes) {
binWrt.WriteChar(kvp.Key);
kvp.Value.Save(binWrt);
}
}
binWrt.WriteInt(iSimilarity);
binWrt.WriteString(sCondition);
binWrt.WriteBool(bWholeWord);
binWrt.WriteString(lrBestRule.Signature);
binWrt.WriteInt(aBestRules.Length);
for (int i = 0; i < aBestRules.Length; i++) {
binWrt.WriteString(aBestRules[i].Rule.Signature);
binWrt.WriteDouble(aBestRules[i].Weight);
}
binWrt.WriteDouble(dWeight);
binWrt.WriteInt(iStart);
binWrt.WriteInt(iEnd);
}
public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode) {
this.lsett = lsett;
if (binRead.ReadBool()) {
dictSubNodes = new Dictionary<char, LemmaTreeNode>();
int iCount = binRead.ReadInt();
for (int i = 0; i < iCount; i++) {
char cKey = binRead.ReadChar();
LemmaTreeNode ltrSub = new LemmaTreeNode(binRead, this.lsett, elExamples, this);
dictSubNodes.Add(cKey, ltrSub);
}
}
else
dictSubNodes = null;
this.ltnParentNode = ltnParentNode;
iSimilarity = binRead.ReadInt();
sCondition = binRead.ReadString();
bWholeWord = binRead.ReadBool();
lrBestRule = elExamples.Rules[binRead.ReadString()];
int iCountBest = binRead.ReadInt();
aBestRules = new RuleWeighted[iCountBest];
for (int i = 0; i < iCountBest; i++)
aBestRules[i] = new RuleWeighted(elExamples.Rules[binRead.ReadString()], binRead.ReadDouble());
dWeight = binRead.ReadDouble();
iStart = binRead.ReadInt();
iEnd = binRead.ReadInt();
this.elExamples = elExamples;
}
public LemmaTreeNode(Latino.BinarySerializer binRead, LemmatizerSettings lsett, ExampleList elExamples, LemmaTreeNode ltnParentNode) {
Load(binRead, lsett, elExamples, ltnParentNode);
}
#endif
#endregion
#region Other (Temporarly)
//TODO - this is temp function, remove it
public bool CheckConsistency()
{
var bReturn = true;
if (dictSubNodes != null)
foreach (var ltnChild in dictSubNodes.Values)
bReturn = bReturn &&
ltnChild.CheckConsistency() &&
ltnChild.sCondition.EndsWith(sCondition);
return bReturn;
}
#endregion
}
}

@ -1,465 +0,0 @@
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Runtime.Serialization;
using System.IO.Compression;
using SevenZip;
namespace LemmaSharp
{
[Serializable]
public class Lemmatizer : ITrainableLemmatizer
#if LATINO
, Latino.ISerializable
#endif
{
#region Private Variables
protected LemmatizerSettings lsett;
protected ExampleList elExamples;
protected LemmaTreeNode ltnRootNode;
protected LemmaTreeNode ltnRootNodeFront;
#endregion
#region Constructor(s)
public Lemmatizer() :
this(new LemmatizerSettings())
{ }
public Lemmatizer(LemmatizerSettings lsett)
{
this.lsett = lsett;
this.elExamples = new ExampleList(lsett);
this.ltnRootNode = null;
this.ltnRootNodeFront = null;
}
public Lemmatizer(StreamReader srIn, string sFormat, LemmatizerSettings lsett) : this(lsett)
{
AddMultextFile(srIn, sFormat);
}
#endregion
#region Private Properties
private LemmaTreeNode ltrRootNodeSafe
{
get
{
if (ltnRootNode == null)
BuildModel();
return ltnRootNode;
}
}
private LemmaTreeNode ltrRootNodeFrontSafe
{
get
{
if (ltnRootNodeFront == null && lsett.bBuildFrontLemmatizer)
BuildModel();
return ltnRootNodeFront;
}
}
#endregion
#region Public Properties
public LemmatizerSettings Settings
{
get
{
return lsett.CloneDeep();
}
}
public ExampleList Examples
{
get
{
return elExamples;
}
}
public RuleList Rules
{
get
{
return elExamples.Rules;
}
}
public LemmaTreeNode RootNode
{
get
{
return ltrRootNodeSafe;
}
}
public LemmaTreeNode RootNodeFront
{
get
{
return ltrRootNodeFrontSafe;
}
}
public ILemmatizerModel Model
{
get
{
return ltrRootNodeSafe;
}
}
#endregion
#region Essential Class Functions (adding examples to repository)
public void AddMultextFile(StreamReader srIn, string sFormat)
{
this.elExamples.AddMultextFile(srIn, sFormat);
ltnRootNode = null;
}
public void AddExample(string sWord, string sLemma)
{
AddExample(sWord, sLemma, 1, null);
}
public void AddExample(string sWord, string sLemma, double dWeight)
{
AddExample(sWord, sLemma, dWeight, null);
}
public void AddExample(string sWord, string sLemma, double dWeight, string sMsd)
{
elExamples.AddExample(sWord, sLemma, dWeight, sMsd);
ltnRootNode = null;
}
public void DropExamples()
{
elExamples.DropExamples();
}
public void FinalizeAdditions()
{
elExamples.FinalizeAdditions();
}
#endregion
#region Essential Class Functions (building model & lemmatizing)
public void BuildModel()
{
if (ltnRootNode != null)
return;
if (!lsett.bBuildFrontLemmatizer)
{
//TODO remove: elExamples.FinalizeAdditions();
elExamples.FinalizeAdditions();
ltnRootNode = new LemmaTreeNode(lsett, elExamples);
}
else
{
ltnRootNode = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(false));
ltnRootNodeFront = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(true));
}
}
public string Lemmatize(string sWord)
{
if (!lsett.bBuildFrontLemmatizer)
{
return ltrRootNodeSafe.Lemmatize(sWord);
}
var sWordFront = LemmaExample.StringReverse(sWord);
var sLemmaFront = ltrRootNodeFrontSafe.Lemmatize(sWordFront);
var sWordRear = LemmaExample.StringReverse(sLemmaFront);
return ltrRootNodeSafe.Lemmatize(sWordRear);
}
#endregion
#region Serialization Functions (ISerializable)
public void GetObjectData(SerializationInfo info, StreamingContext context)
{
info.AddValue("lsett", lsett);
info.AddValue("elExamples", elExamples);
}
public Lemmatizer(SerializationInfo info, StreamingContext context) : this()
{
lsett = (LemmatizerSettings)info.GetValue("lsett", typeof(LemmatizerSettings));
elExamples = (ExampleList)info.GetValue("elExamples", typeof(ExampleList));
this.BuildModel();
}
#endregion
#region Serialization Functions (Binary)
public void Serialize(BinaryWriter binWrt, bool bSerializeExamples)
{
lsett.Serialize(binWrt);
binWrt.Write(bSerializeExamples);
elExamples.Serialize(binWrt, bSerializeExamples, false);
if (!bSerializeExamples)
{
elExamples.GetFrontRearExampleList(false).Serialize(binWrt, bSerializeExamples, false);
elExamples.GetFrontRearExampleList(true).Serialize(binWrt, bSerializeExamples, false);
}
ltnRootNode.Serialize(binWrt);
if (lsett.bBuildFrontLemmatizer)
ltnRootNodeFront.Serialize(binWrt);
}
public void Deserialize(BinaryReader binRead)
{
lsett = new LemmatizerSettings(binRead);
var bSerializeExamples = binRead.ReadBoolean();
elExamples = new ExampleList(binRead, lsett);
ExampleList elExamplesRear;
ExampleList elExamplesFront;
if (bSerializeExamples)
{
elExamplesRear = elExamples.GetFrontRearExampleList(false);
elExamplesFront = elExamples.GetFrontRearExampleList(true);
}
else
{
elExamplesRear = new ExampleList(binRead, lsett);
elExamplesFront = new ExampleList(binRead, lsett);
}
if (!lsett.bBuildFrontLemmatizer)
{
ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples, null);
}
else
{
ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamplesRear, null);
ltnRootNodeFront = new LemmaTreeNode(binRead, lsett, elExamplesFront, null);
}
}
//Do not change the order!!! (If new compression algorithms are added, otherwise you will not be able to load old files.)
public enum Compression
{
None,
Deflate,
LZMA
}
public Lemmatizer(BinaryReader binRead)
{
var compr = (Compression)binRead.ReadByte();
if (compr == Compression.None)
Deserialize(binRead);
else
throw new Exception("Loading lemmatizer with binary reader on uncompressed stream is not supported.");
}
public Lemmatizer(Stream streamIn)
{
Deserialize(streamIn);
}
public void Serialize(Stream streamOut)
{
Serialize(streamOut, true, Compression.None);
}
public void Serialize(Stream streamOut, bool bSerializeExamples)
{
Serialize(streamOut, bSerializeExamples, Compression.None);
}
public void Serialize(Stream streamOut, bool bSerializeExamples, Compression compress)
{
streamOut.WriteByte((byte)compress);
switch (compress)
{
case Compression.None:
SerializeNone(streamOut, bSerializeExamples);
break;
case Compression.Deflate:
SerializeDeflate(streamOut, bSerializeExamples);
break;
case Compression.LZMA:
SerializeLZMA(streamOut, bSerializeExamples);
break;
default:
break;
}
}
private void SerializeNone(Stream streamOut, bool bSerializeExamples)
{
using (var binWrt = new BinaryWriter(streamOut))
{
this.Serialize(binWrt, bSerializeExamples);
}
}
private void SerializeDeflate(Stream streamOut, bool bSerializeExamples)
{
using (var streamOutNew = new DeflateStream(streamOut, CompressionMode.Compress, true))
{
using (var binWrt = new BinaryWriter(streamOutNew))
{
this.Serialize(binWrt, bSerializeExamples);
binWrt.Flush();
binWrt.Close();
}
}
}
private void SerializeLZMA(Stream streamOut, bool bSerializeExamples)
{
CoderPropID[] propIDs =
{
CoderPropID.DictionarySize,
CoderPropID.PosStateBits,
CoderPropID.LitContextBits,
CoderPropID.LitPosBits,
CoderPropID.Algorithm,
CoderPropID.NumFastBytes,
CoderPropID.MatchFinder,
CoderPropID.EndMarker
};
Int32 dictionary = 1 << 23;
Int32 posStateBits = 2;
Int32 litContextBits = 3; // for normal files
Int32 litPosBits = 0;
Int32 algorithm = 2;
Int32 numFastBytes = 128;
var mf = "bt4";
var eos = false;
object[] properties =
{
(Int32)(dictionary),
(Int32)(posStateBits),
(Int32)(litContextBits),
(Int32)(litPosBits),
(Int32)(algorithm),
(Int32)(numFastBytes),
mf,
eos
};
using (var msTemp = new MemoryStream())
{
using (var binWrtTemp = new BinaryWriter(msTemp))
{
this.Serialize(binWrtTemp, bSerializeExamples);
msTemp.Position = 0;
var encoder = new SevenZip.Compression.LZMA.Encoder();
encoder.SetCoderProperties(propIDs, properties);
encoder.WriteCoderProperties(streamOut);
var fileSize = msTemp.Length;
for (int i = 0; i < 8; i++)
{
streamOut.WriteByte((Byte)(fileSize >> (8 * i)));
}
encoder.Code(msTemp, streamOut, -1, -1, null);
binWrtTemp.Close();
encoder = null;
}
msTemp.Close();
}
}
public void Deserialize(Stream streamIn)
{
var compr = (Compression)streamIn.ReadByte();
using (var streamInNew = Decompress(streamIn, compr))
{
using (var br = new BinaryReader(streamInNew))
{
Deserialize(br);
}
}
}
private Stream Decompress(Stream streamIn, Compression compress)
{
Stream streamInNew;
switch (compress)
{
case Compression.None:
default:
streamInNew = streamIn;
break;
case Compression.Deflate:
streamInNew = new DeflateStream(streamIn, CompressionMode.Decompress);
break;
case Compression.LZMA:
streamInNew = DecompressLZMA(streamIn);
break;
}
return streamInNew;
}
private Stream DecompressLZMA(Stream streamIn)
{
var properties = new byte[5];
if (streamIn.Read(properties, 0, 5) != 5)
throw new Exception("input .lzma is too short");
var decoder = new SevenZip.Compression.LZMA.Decoder();
decoder.SetDecoderProperties(properties);
long outSize = 0;
for (var i = 0; i < 8; i++)
{
var v = streamIn.ReadByte();
if (v < 0)
throw (new Exception("Can't Read 1"));
outSize |= ((long)(byte)v) << (8 * i);
}
var compressedSize = streamIn.Length - streamIn.Position;
var outStream = new MemoryStream();
decoder.Code(streamIn, outStream, compressedSize, outSize, null);
outStream.Seek(0, 0);
decoder = null;
return outStream;
}
#endregion
#region Serialization Functions (Latino)
#if LATINO
public void Save(Latino.BinarySerializer binWrt) {
lsett.Save(binWrt);
elExamples.Save(binWrt, true, false);
ltnRootNode.Save(binWrt);
if (lsett.bBuildFrontLemmatizer)
ltnRootNodeFront.Save(binWrt);
}
public void Load(Latino.BinarySerializer binRead) {
lsett = new LemmatizerSettings(binRead);
elExamples = new ExampleList(binRead, lsett);
if (!lsett.bBuildFrontLemmatizer) {
ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples, null);
}
else {
ltnRootNode = new LemmaTreeNode(binRead, lsett, elExamples.GetFrontRearExampleList(false) , null);
ltnRootNodeFront = new LemmaTreeNode(binRead, lsett, elExamples.GetFrontRearExampleList(true), null);
}
}
public Lemmatizer(Latino.BinarySerializer binRead) {
Load(binRead);
}
public void Save(Stream streamOut) {
Latino.BinarySerializer binWrt = new Latino.BinarySerializer(streamOut);
this.Save(binWrt);
binWrt.Close();
}
public void Load(Stream streamIn) {
Latino.BinarySerializer binRead = new Latino.BinarySerializer(streamIn);
Load(binRead);
binRead.Close();
}
public Lemmatizer(Stream streamIn, string sDummy) {
Load(streamIn);
}
#endif
#endregion
}
}

@ -1,143 +0,0 @@
using System;
using System.IO;
using System.Runtime.Serialization;
namespace LemmaSharp
{
/// <summary>
/// These are the lemmagen algorithm settings that affect speed/power of the learning and lemmatizing algorithm.
/// TODO this class will be probbably removed in the future.
/// </summary>
[Serializable]
public class LemmatizerSettings : ISerializable
{
#region Constructor(s)
public LemmatizerSettings()
{
}
#endregion
#region Sub-Structures
/// <summary>
/// How algorithm considers msd tags.
/// </summary>
public enum MsdConsideration
{
/// <summary>
/// Completely ignores mds tags (join examples with different tags and sum their weihgts).
/// </summary>
Ignore,
/// <summary>
/// Same examples with different msd's are not considered equal and joined.
/// </summary>
Distinct,
/// <summary>
/// Joins examples with different tags (concatenates all msd tags).
/// </summary>
JoinAll,
/// <summary>
/// Joins examples with different tags (concatenates just distinct msd tags - somehow slower).
/// </summary>
JoinDistinct,
/// <summary>
/// Joins examples with different tags (new tag is the left to right substring that all joined examples share).
/// </summary>
JoinSameSubstring
}
#endregion
#region Public Variables
/// <summary>
/// True if from string should be included in rule identifier ([from]->[to]). False if just length of from string is used ([#len]->[to]).
/// </summary>
public bool bUseFromInRules = true;
/// <summary>
/// Specification how algorithm considers msd tags.
/// </summary>
public MsdConsideration eMsdConsider = MsdConsideration.Distinct;
/// <summary>
/// How many of the best rules are kept in memory for each node. Zero means unlimited.
/// </summary>
public int iMaxRulesPerNode = 0;
/// <summary>
/// If true, than build proccess uses few more hevristics to build first left to right lemmatizer (lemmatizes front of the word)
/// </summary>
public bool bBuildFrontLemmatizer = false;
#endregion
#region Cloneable functions
public LemmatizerSettings CloneDeep()
{
return new LemmatizerSettings()
{
bUseFromInRules = this.bUseFromInRules,
eMsdConsider = this.eMsdConsider,
iMaxRulesPerNode = this.iMaxRulesPerNode,
bBuildFrontLemmatizer = this.bBuildFrontLemmatizer
};
}
#endregion
#region Serialization Functions (ISerializable)
public void GetObjectData(SerializationInfo info, StreamingContext context)
{
info.AddValue("bUseFromInRules", bUseFromInRules);
info.AddValue("eMsdConsider", eMsdConsider);
info.AddValue("iMaxRulesPerNode", iMaxRulesPerNode);
info.AddValue("bBuildFrontLemmatizer", bBuildFrontLemmatizer);
}
public LemmatizerSettings(SerializationInfo info, StreamingContext context)
{
bUseFromInRules = info.GetBoolean("bUseFromInRules");
eMsdConsider = (MsdConsideration)info.GetValue("eMsdConsider", typeof(MsdConsideration));
iMaxRulesPerNode = info.GetInt32("iMaxRulesPerNode");
bBuildFrontLemmatizer = info.GetBoolean("bBuildFrontLemmatizer");
}
#endregion
#region Serialization Functions (Binary)
public void Serialize(BinaryWriter binWrt)
{
binWrt.Write(bUseFromInRules);
binWrt.Write((int)eMsdConsider);
binWrt.Write(iMaxRulesPerNode);
binWrt.Write(bBuildFrontLemmatizer);
}
public void Deserialize(BinaryReader binRead)
{
bUseFromInRules = binRead.ReadBoolean();
eMsdConsider = (MsdConsideration)binRead.ReadInt32();
iMaxRulesPerNode = binRead.ReadInt32();
bBuildFrontLemmatizer = binRead.ReadBoolean();
}
public LemmatizerSettings(System.IO.BinaryReader binRead)
{
this.Deserialize(binRead);
}
#endregion
#region Serialization Functions (Latino)
#if LATINO
public void Save(Latino.BinarySerializer binWrt) {
binWrt.WriteBool(bUseFromInRules);
binWrt.WriteInt((int)eMsdConsider);
binWrt.WriteInt(iMaxRulesPerNode);
binWrt.WriteBool(bBuildFrontLemmatizer);
}
public void Load(Latino.BinarySerializer binRead) {
bUseFromInRules = binRead.ReadBool();
eMsdConsider = (MsdConsideration)binRead.ReadInt();
iMaxRulesPerNode = binRead.ReadInt();
bBuildFrontLemmatizer = binRead.ReadBool();
}
public LemmatizerSettings(Latino.BinarySerializer reader) {
Load(reader);
}
#endif
#endregion
}
}

@ -1,161 +0,0 @@
using System.Collections.Generic;
using System.IO;
namespace LemmaSharp
{
public class RuleList : Dictionary<string, LemmaRule>
{
#region Private Variables
private LemmatizerSettings lsett;
private LemmaRule lrDefaultRule;
#endregion
#region Constructor(s)
public RuleList(LemmatizerSettings lsett)
{
this.lsett = lsett;
lrDefaultRule = AddRule(new LemmaRule("", "", 0, lsett));
}
#endregion
#region Public Properties
public LemmaRule DefaultRule
{
get
{
return lrDefaultRule;
}
}
#endregion
#region Essential Class Functions
public LemmaRule AddRule(LemmaExample le)
{
return AddRule(new LemmaRule(le.Word, le.Lemma, this.Count, lsett));
}
private LemmaRule AddRule(LemmaRule lrRuleNew)
{
LemmaRule lrRuleReturn = null;
if (!this.TryGetValue(lrRuleNew.Signature, out lrRuleReturn))
{
lrRuleReturn = lrRuleNew;
this.Add(lrRuleReturn.Signature, lrRuleReturn);
}
return lrRuleReturn;
}
#endregion
#region Serialization Functions (Binary)
public void Serialize(BinaryWriter binWrt, bool bThisTopObject)
{
//save metadata
binWrt.Write(bThisTopObject);
//save value types --------------------------------------
//save refernce types if needed -------------------------
if (bThisTopObject)
lsett.Serialize(binWrt);
//save list items ---------------------------------------
var iCount = this.Count;
binWrt.Write(iCount);
foreach (var kvp in this)
{
binWrt.Write(kvp.Key);
kvp.Value.Serialize(binWrt, false);
}
//default rule is already saved in the list. Here just save its id.
binWrt.Write(lrDefaultRule.Signature);
}
public void Deserialize(BinaryReader binRead, LemmatizerSettings lsett)
{
//load metadata
var bThisTopObject = binRead.ReadBoolean();
//load value types --------------------------------------
//load refernce types if needed -------------------------
if (bThisTopObject)
this.lsett = new LemmatizerSettings(binRead);
else
this.lsett = lsett;
//load list items ---------------------------------------
this.Clear();
int iCount = binRead.ReadInt32();
for (var iId = 0; iId < iCount; iId++)
{
var sKey = binRead.ReadString();
var lrVal = new LemmaRule(binRead, this.lsett);
this.Add(sKey, lrVal);
}
//link the default rule just Id was saved.
lrDefaultRule = this[binRead.ReadString()];
}
public RuleList(System.IO.BinaryReader binRead, LemmatizerSettings lsett)
{
this.Deserialize(binRead, lsett);
}
#endregion
#region Serialization Functions (Latino)
#if LATINO
public void Save(Latino.BinarySerializer binWrt, bool bThisTopObject) {
//save metadata
binWrt.WriteBool(bThisTopObject);
//save value types --------------------------------------
//save refernce types if needed -------------------------
if (bThisTopObject)
lsett.Save(binWrt);
//save list items ---------------------------------------
int iCount = this.Count;
binWrt.WriteInt(iCount);
foreach (KeyValuePair<string, LemmaRule> kvp in this) {
binWrt.WriteString(kvp.Key);
kvp.Value.Save(binWrt, false);
}
//default rule is already saved in the list. Here just save its id.
binWrt.WriteString(lrDefaultRule.Signature);
}
public void Load(Latino.BinarySerializer binRead, LemmatizerSettings lsett) {
//load metadata
bool bThisTopObject = binRead.ReadBool();
//load value types --------------------------------------
//load refernce types if needed -------------------------
if (bThisTopObject)
this.lsett = new LemmatizerSettings(binRead);
else
this.lsett = lsett;
//load list items ---------------------------------------
this.Clear();
int iCount = binRead.ReadInt();
for (int iId = 0; iId < iCount; iId++) {
string sKey = binRead.ReadString();
LemmaRule lrVal = new LemmaRule(binRead, this.lsett);
this.Add(sKey, lrVal);
}
//link the default rule just Id was saved.
lrDefaultRule = this[binRead.ReadString()];
}
public RuleList(Latino.BinarySerializer binRead, LemmatizerSettings lsett) {
Load(binRead, lsett);
}
#endif
#endregion
}
}

@ -1,50 +0,0 @@
using System;
namespace LemmaSharp
{
[Serializable]
class RuleWeighted : IComparable<RuleWeighted>
{
#region Private Variables
private LemmaRule lrRule;
private double dWeight;
#endregion
#region Constructor(s)
public RuleWeighted(LemmaRule lrRule, double dWeight)
{
this.lrRule = lrRule;
this.dWeight = dWeight;
}
#endregion
#region Public Properties
public LemmaRule Rule
{
get { return lrRule; }
}
public double Weight
{
get { return dWeight; }
}
#endregion
#region Essential Class Functions (comparing objects, eg.: for sorting)
public int CompareTo(RuleWeighted rl)
{
if (this.dWeight < rl.dWeight) return 1;
if (this.dWeight > rl.dWeight) return -1;
if (this.lrRule.Id < rl.lrRule.Id) return 1;
if (this.lrRule.Id > rl.lrRule.Id) return -1;
return 0;
}
#endregion
#region Output & Serialization Functions
public override string ToString()
{
return string.Format("{0}{1:(0.00%)}", lrRule, dWeight);
}
#endregion
}
}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save

Powered by TurnKey Linux.