Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Src/Utilities/SfmToXml/Converter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1896,6 +1896,8 @@ string ConvertBytes(string marker, byte[] data, int start, int end, IEncConverte
result = result.Replace("<", "&lt;");
result = result.Replace(">", "&gt;");

// We need to normalize result to NFD because the internal list data is NFD. See LT-18927.
result = result.Normalize(NormalizationForm.FormD);
return result;
}

Expand Down
65 changes: 64 additions & 1 deletion Src/Utilities/SfmToXml/Sfm2XmlTests/ConverterTests.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
// Copyright (c) 2017 SIL International
// Copyright (c) 2017 SIL International
// This software is licensed under the LGPL, version 2.1 or later
// (http://www.gnu.org/licenses/lgpl-2.1.html)

using System.IO;
using NUnit.Framework;
using SIL.TestUtilities;
using Sfm2Xml;
using System.Text;
using System.Linq;
using System.Xml;

namespace Sfm2XmlTests
{
Expand Down Expand Up @@ -76,5 +79,65 @@ public void ConverterHandlesSubEntryExampleFollowedByEntry()
AssertThatXmlIn.File(outputFile).HasSpecifiedNumberOfMatchesForXpath("//Entry", 2);
AssertThatXmlIn.File(outputFile).HasSpecifiedNumberOfMatchesForXpath("//Entry/Subentry", 1);
}

[Test]
public void ConverterNormalizesTextToNfd()
{
// NFC form: é (U+00E9)
const string composed = "\u00E9";

// SFM input containing NFC text
string sfmString = $@"\lx {composed}
\ps n
\ge test";

// Reuse the same mapping as other tests
const string mappingString = @"<sfmMapping version='6.1'>
<settings>
<meaning app='fw.sil.org'/>
</settings>
<languages>
<langDef id='English' xml:lang='en'/>
<langDef id='Vernacular' xml:lang='fr'/>
</languages>
<hierarchy>
<level name='Entry' partOf='records' beginFields='lx'/>
<level name='Sense' partOf='Entry' beginFields='ge ps'/>
</hierarchy>
<fieldDescriptions>
<field sfm='lx' name='Lexeme Form' type='string' lang='Vernacular'/>
<field sfm='ps' name='Category' type='string' lang='English'/>
<field sfm='ge' name='Gloss' type='string' lang='English'/>
</fieldDescriptions>
</sfmMapping>";

var sfmFile = Path.GetTempFileName();
var mappingFile = Path.GetTempFileName();
var outputFile = Path.GetTempFileName();

File.WriteAllText(sfmFile, sfmString);
File.WriteAllText(mappingFile, mappingString);

var converter = new Converter(null);
converter.Convert(sfmFile, mappingFile, outputFile);

// Extract the lexeme text from output XML
var doc = new XmlDocument();
doc.Load(outputFile);

var lexemeNode = doc.SelectSingleNode("//lx | //LexemeForm | //Lexeme");
Assert.NotNull(lexemeNode, "Lexeme node was not found in output XML");

string outputText = lexemeNode.InnerText;

// Assert normalization
Assert.IsTrue(IsNfd(outputText),
$"Expected NFD normalization, but got: {string.Join(" ", outputText.Select(c => $"U+{(int)c:X4}"))}");
}

private static bool IsNfd(string s)
{
return s == s.Normalize(NormalizationForm.FormD);
}
}
}
Loading