Fix word breaking: prevent splitting words with accented characters
Fixed line breaking that would split words like "équivaut" into "é" on one
line and "quivaut" on the next line, even though they're part of the same word.
Root cause analysis (from debug logging):
When text contains accented characters in decomposed form (e + combining
accent), the system processes them as separate atoms:
1. "é" is processed as an accent atom, composed, and added to currentLine
2. "quivaut " is processed as the next ordinary atom
3. Before adding "quivaut ", checkAndPerformInteratomLineBreak() is called
4. This function sees that adding "quivaut " would exceed maxWidth
5. It breaks and flushes the line with "é" at the end
6. "quivaut " starts on a new line
Result: "é" appears alone at the end of one line, "quivaut " on the next.
The fix:
Modified checkAndPerformInteratomLineBreak() in MTTypesetter.swift to detect
when we're about to break in the middle of a word.
This commit is contained in:
@@ -542,6 +542,28 @@ class MTTypesetter {
|
||||
// Don't break if current line is empty
|
||||
guard currentLine.length > 0 else { return false }
|
||||
|
||||
// CRITICAL: Don't break in the middle of words
|
||||
// When "équivaut" is decomposed as "é" (accent) + "quivaut" (ordinary),
|
||||
// we must not break between them even if the line exceeds maxWidth.
|
||||
// Check if currentLine ends with a letter and next atom starts with a letter
|
||||
// This prevents breaking mid-word (like "é|quivaut")
|
||||
if atom.type == .ordinary && !atom.nucleus.isEmpty {
|
||||
let lineText = currentLine.string
|
||||
if !lineText.isEmpty {
|
||||
let lastChar = lineText.last!
|
||||
let firstChar = atom.nucleus.first!
|
||||
|
||||
// If line ends with a letter (no trailing space/punctuation) and next atom
|
||||
// starts with a letter, they're part of the same word - don't break!
|
||||
// Example: "...é" + "quivaut" should not break
|
||||
// But "...km " + "équivaut" can break (has space)
|
||||
if lastChar.isLetter && firstChar.isLetter {
|
||||
// Don't break - this would split a word
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate what the width would be if we add this atom
|
||||
let currentLineWidth = getCurrentLineWidth()
|
||||
let atomWidth = calculateAtomWidth(atom, prevNode: prevNode)
|
||||
@@ -1085,8 +1107,9 @@ class MTTypesetter {
|
||||
let current = NSAttributedString(string:normalizedString)
|
||||
currentLine.append(current)
|
||||
|
||||
// Check if we should break the line
|
||||
self.checkAndBreakLine()
|
||||
// Don't check for line breaks here - accented characters are part of words
|
||||
// and breaking after each one would split words like "équivaut" into "é" + "quivaut"
|
||||
// Line breaking is handled in the regular .ordinary case below
|
||||
|
||||
// Add to atom list
|
||||
if currentLineIndexRange.location == NSNotFound {
|
||||
@@ -1337,11 +1360,20 @@ class MTTypesetter {
|
||||
let typesetter = CTTypesetterCreateWithAttributedString(attrString as CFAttributedString)
|
||||
let suggestedBreak = CTTypesetterSuggestLineBreak(typesetter, 0, Double(maxWidth))
|
||||
|
||||
guard suggestedBreak > 0 && suggestedBreak < text.count else {
|
||||
guard suggestedBreak > 0 else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let breakIndex = text.index(text.startIndex, offsetBy: suggestedBreak)
|
||||
// IMPORTANT: CTTypesetterSuggestLineBreak returns a UTF-16 code unit offset,
|
||||
// but Swift String.Index works with Unicode extended grapheme clusters.
|
||||
// We must convert from UTF-16 space to String.Index properly to avoid
|
||||
// breaking in the middle of Unicode characters (like "é" in "équivaut").
|
||||
|
||||
// Convert UTF-16 offset to String.Index
|
||||
guard let utf16Index = text.utf16.index(text.utf16.startIndex, offsetBy: suggestedBreak, limitedBy: text.utf16.endIndex),
|
||||
let breakIndex = String.Index(utf16Index, within: text) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Conservative check: verify we're not breaking within a number
|
||||
if isBreakingSafeForNumbers(text: text, breakIndex: breakIndex) {
|
||||
|
||||
@@ -192,6 +192,39 @@ class MTMathUILabelLineWrappingTests: XCTestCase {
|
||||
XCTAssertNil(label.error, "Should have no rendering error")
|
||||
}
|
||||
|
||||
func testUnicodeWordBreaking_EquivautCase() {
|
||||
// Specific test for the reported issue: "équivaut" should not break at "é"
|
||||
let label = MTMathUILabel()
|
||||
label.latex = "\\(\\text{Rappelons la conversion : 1 km équivaut à 1000 m.}\\)"
|
||||
label.font = MTFontManager.fontManager.defaultFont
|
||||
label.labelMode = .text
|
||||
|
||||
// Set the exact width constraint from the bug report
|
||||
label.preferredMaxLayoutWidth = 235
|
||||
let constrainedSize = label.intrinsicContentSize
|
||||
|
||||
// Verify the label can render without errors
|
||||
label.frame = CGRect(origin: .zero, size: constrainedSize)
|
||||
#if os(macOS)
|
||||
label.layout()
|
||||
#else
|
||||
label.layoutSubviews()
|
||||
#endif
|
||||
|
||||
XCTAssertNotNil(label.displayList, "Display list should be created")
|
||||
XCTAssertNil(label.error, "Should have no rendering error")
|
||||
|
||||
// Verify that the text wrapped (multiple lines)
|
||||
XCTAssertGreaterThan(constrainedSize.height, 20, "Should have wrapped to multiple lines")
|
||||
|
||||
// The critical check: ensure "équivaut" is not broken in the middle
|
||||
// We can't easily check the exact line breaks, but we can verify:
|
||||
// 1. The rendering succeeded without crashes
|
||||
// 2. The display has reasonable dimensions
|
||||
XCTAssertGreaterThan(constrainedSize.width, 100, "Width should be reasonable")
|
||||
XCTAssertLessThan(constrainedSize.width, 250, "Width should respect constraint")
|
||||
}
|
||||
|
||||
func testNumberProtection_FrenchDecimal() {
|
||||
let label = MTMathUILabel()
|
||||
// French decimal number should NOT be broken
|
||||
|
||||
Reference in New Issue
Block a user