diff --git a/Src/Utilities/SfmToXml/Converter.cs b/Src/Utilities/SfmToXml/Converter.cs index 61b3dacedf..84c20c0581 100644 --- a/Src/Utilities/SfmToXml/Converter.cs +++ b/Src/Utilities/SfmToXml/Converter.cs @@ -1896,6 +1896,8 @@ string ConvertBytes(string marker, byte[] data, int start, int end, IEncConverte result = result.Replace("<", "<"); result = result.Replace(">", ">"); + // We need to normalize result to NFD because the internal list data is NFD. See LT-18927. + result = result.Normalize(NormalizationForm.FormD); return result; } diff --git a/Src/Utilities/SfmToXml/Sfm2XmlTests/ConverterTests.cs b/Src/Utilities/SfmToXml/Sfm2XmlTests/ConverterTests.cs index 7823e5d492..01d66e504f 100644 --- a/Src/Utilities/SfmToXml/Sfm2XmlTests/ConverterTests.cs +++ b/Src/Utilities/SfmToXml/Sfm2XmlTests/ConverterTests.cs @@ -1,4 +1,4 @@ -// Copyright (c) 2017 SIL International +// Copyright (c) 2017 SIL International // This software is licensed under the LGPL, version 2.1 or later // (http://www.gnu.org/licenses/lgpl-2.1.html) @@ -6,6 +6,9 @@ using NUnit.Framework; using SIL.TestUtilities; using Sfm2Xml; +using System.Text; +using System.Linq; +using System.Xml; namespace Sfm2XmlTests { @@ -76,5 +79,65 @@ public void ConverterHandlesSubEntryExampleFollowedByEntry() AssertThatXmlIn.File(outputFile).HasSpecifiedNumberOfMatchesForXpath("//Entry", 2); AssertThatXmlIn.File(outputFile).HasSpecifiedNumberOfMatchesForXpath("//Entry/Subentry", 1); } + + [Test] + public void ConverterNormalizesTextToNfd() + { + // NFC form: é (U+00E9) + const string composed = "\u00E9"; + + // SFM input containing NFC text + string sfmString = $@"\lx {composed} +\ps n +\ge test"; + + // Reuse the same mapping as other tests + const string mappingString = @" + + + + + + + + + + + + + + + + +"; + + var sfmFile = Path.GetTempFileName(); + var mappingFile = Path.GetTempFileName(); + var outputFile = Path.GetTempFileName(); + + File.WriteAllText(sfmFile, sfmString); + File.WriteAllText(mappingFile, mappingString); + + var converter = new Converter(null); + converter.Convert(sfmFile, mappingFile, outputFile); + + // Extract the lexeme text from output XML + var doc = new XmlDocument(); + doc.Load(outputFile); + + var lexemeNode = doc.SelectSingleNode("//lx | //LexemeForm | //Lexeme"); + Assert.NotNull(lexemeNode, "Lexeme node was not found in output XML"); + + string outputText = lexemeNode.InnerText; + + // Assert normalization + Assert.IsTrue(IsNfd(outputText), + $"Expected NFD normalization, but got: {string.Join(" ", outputText.Select(c => $"U+{(int)c:X4}"))}"); + } + + private static bool IsNfd(string s) + { + return s == s.Normalize(NormalizationForm.FormD); + } } }