From 926902a5b3db416dddf4769570cd5b5b83509b78 Mon Sep 17 00:00:00 2001 From: Bill Wagner Date: Mon, 8 Dec 2025 15:37:59 -0500 Subject: [PATCH 01/11] Add new diagnostics Add the new diagnostics that weren't already included in an appropriate diagnostic issue. --- .../compiler-messages/string-literal.md | 21 +++++++++++++++++++ docs/csharp/language-reference/toc.yml | 6 +++--- ...n-t-have-specifics-on-this-csharp-error.md | 2 -- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/docs/csharp/language-reference/compiler-messages/string-literal.md b/docs/csharp/language-reference/compiler-messages/string-literal.md index 396966581ceb0..d5619ac51f11c 100644 --- a/docs/csharp/language-reference/compiler-messages/string-literal.md +++ b/docs/csharp/language-reference/compiler-messages/string-literal.md @@ -6,6 +6,7 @@ f1_keywords: - "CS1011" - "CS1012" - "CS1039" + - "CS8996" - "CS8997" - "CS8998" - "CS8999" @@ -20,6 +21,8 @@ f1_keywords: - "CS9008" - "CS9009" - "CS1010" + - "CS9026" + - "CS9047" - "CS9274" - "CS9315" helpviewer_keywords: @@ -27,6 +30,7 @@ helpviewer_keywords: - "CS1011" - "CS1012" - "CS1039" + - "CS8996" - "CS8997" - "CS8998" - "CS8999" @@ -41,6 +45,8 @@ helpviewer_keywords: - "CS9008" - "CS9009" - "CS1010" + - "CS9026" + - "CS9047" - "CS9274" - "CS9315" ms.date: 10/09/2025 @@ -58,6 +64,7 @@ That's by design. The text closely matches the text of the compiler error / warn - [**CS1011**](#incorrectly-formed-string-literals): *Empty character literal.* - [**CS1012**](#incorrectly-formed-string-literals): *Too many characters in character literal.* - [**CS1039**](#incorrectly-formed-string-literals): *Unterminated string literal.* +- [**CS8996**](#incorrectly-formed-raw-string-literals): *Raw string literals are not allowed in preprocessor directives.* - [**CS8997**](#incorrectly-formed-raw-string-literals): *Unterminated raw string literal.* - [**CS8998**](#incorrectly-formed-raw-string-literals): *Not enough starting quotes for this raw string content.* - [**CS8999**](#incorrectly-formed-raw-string-literals): *Line does not start with the same whitespace as the closing line of the raw string literal.* @@ -71,6 +78,8 @@ That's by design. The text closely matches the text of the compiler error / warn - [**CS9007**](#incorrectly-formed-raw-string-literals): *Too many closing braces for interpolated raw string literal.* - [**CS9008**](#incorrectly-formed-raw-string-literals): *Sequence of '@' characters is not allowed.* - [**CS9009**](#incorrectly-formed-raw-string-literals): *String must start with quote character.* +- [**CS9026**](#utf-8-string-literals): *The input string cannot be converted into the equivalent UTF-8 byte representation.* +- [**CS9047**](#utf-8-string-literals): *Operator cannot be applied to operands that are not UTF-8 byte representations.* - [**CS9274**](#literal-strings-in-data-sections): *Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal.* - [**CS9315**](#literal-strings-in-data-sections): *Combined length of user strings used by the program exceeds allowed limit. Adding a string literal requires restarting the application.* @@ -119,6 +128,7 @@ For more information on literal strings and escape sequences, see the articles o The following errors are related to raw string literal syntax and usage. +- **CS8996** - *Raw string literals are not allowed in preprocessor directives.* - **CS8997** - *Unterminated raw string literal.* - **CS8998** - *Not enough starting quotes for this raw string content.* - **CS8999** - *Line does not start with the same whitespace as the closing line of the raw string literal.* @@ -154,6 +164,17 @@ var t = """First line For full syntax and more examples, see the [language reference on raw string literals](../tokens/raw-string.md). +## UTF-8 string literals + +- **CS9026** - *The input string cannot be converted into the equivalent UTF-8 byte representation.* +- **CS9047** - *Operator cannot be applied to operands that are not UTF-8 byte representations.* + +**CS9026** occurs when a string literal with the `u8` suffix contains characters or escape sequences that cannot be represented in UTF-8. The most common cause is attempting to use surrogate code points directly in a UTF-8 string literal. UTF-8 string literals must contain valid Unicode text that can be encoded as UTF-8 bytes. + +**CS9047** occurs when you attempt to use the addition operator (`+`) to concatenate UTF-8 string literals with non-UTF-8 operands. The addition operator for UTF-8 strings only works when both operands are UTF-8 byte representations (`ReadOnlySpan`). + +For more information on UTF-8 string literals, see the [language reference on UTF-8 string literals](../builtin-types/reference-types.md#utf-8-string-literals). + ## Literal strings in data sections - **CS9274**: *Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal.* diff --git a/docs/csharp/language-reference/toc.yml b/docs/csharp/language-reference/toc.yml index c6e7591069a41..4d3e0729aa6fb 100644 --- a/docs/csharp/language-reference/toc.yml +++ b/docs/csharp/language-reference/toc.yml @@ -596,9 +596,9 @@ items: - name: String literal declarations href: ./compiler-messages/string-literal.md displayName: > - CS1009, CS1010, CS1011, CS1012, CS1039, CS8997, CS8998, CS8999, CS9000, CS9001, - CS9002, CS9003, CS9004, CS9005, CS9006, - CS9007, CS9008, CS9009, CS9274, CS9315 + CS1009, CS1010, CS1011, CS1012, CS1039, CS8996, CS8997, CS8998, CS8999, CS9000, + CS9001, CS9002, CS9003, CS9004, CS9005, CS9006, + CS9007, CS9008, CS9009, CS9026, CS9047, CS9274, CS9315 - name: Array declarations href: ./compiler-messages/array-declaration-errors.md displayName: > diff --git a/docs/csharp/misc/sorry-we-don-t-have-specifics-on-this-csharp-error.md b/docs/csharp/misc/sorry-we-don-t-have-specifics-on-this-csharp-error.md index 1d541b82007d3..cc0ec088edba0 100644 --- a/docs/csharp/misc/sorry-we-don-t-have-specifics-on-this-csharp-error.md +++ b/docs/csharp/misc/sorry-we-don-t-have-specifics-on-this-csharp-error.md @@ -468,7 +468,6 @@ f1_keywords: - "CS9020" - "CS9021" - "CS9022" - - "CS9026" - "CS9027" - "CS9029" - "CS9030" @@ -486,7 +485,6 @@ f1_keywords: - "CS9044" - "CS9045" - "CS9046" - - "CS9047" - "CS9049" - "CS9051" - "CS9052" From 6fa5abdffcbb071940106a6494e0c7c163f50eaa Mon Sep 17 00:00:00 2001 From: Bill Wagner Date: Mon, 8 Dec 2025 15:50:36 -0500 Subject: [PATCH 02/11] Update affected files to focus on fixes --- .github/prompts/error-consolidation.md | 2 +- .../compiler-messages/string-literal.md | 76 ++++++------------- 2 files changed, 25 insertions(+), 53 deletions(-) diff --git a/.github/prompts/error-consolidation.md b/.github/prompts/error-consolidation.md index 6fc5e79dfadd6..65ec8247348d2 100644 --- a/.github/prompts/error-consolidation.md +++ b/.github/prompts/error-consolidation.md @@ -64,7 +64,7 @@ Understand these instructions, then suggest a list of themes and the included er ## Move from description to resolution -Rework the highlighted section so the focus is on how to correct each error. This article doesn't need to explain the associated language feature. Instead, in each section, provide links to language reference or language specification material that explains the rules violated when these diagnostics appear. Add explanatory context after each correction (in parentheses with the error code). Provided brief reasons why each correction is needed. Use detailed, sentence-style explanations rather than brief imperative statements. For each recommendation put the affectived error codes in parentheses, and in **bold** style. Remove extensive examples. Remove all H3 headings in this section. If any errors are no longer produced in the latest version of C#, make a note of that. +Rework the highlighted section so the focus is on how to correct each error. This article doesn't need to explain the associated language feature. Instead, in each section, provide links to language reference or language specification material that explains the rules violated when these diagnostics appear. Add explanatory context after each correction (in parentheses with the error code). Provided brief reasons why each correction is needed. Use detailed, sentence-style explanations rather than brief imperative statements. For each recommendation put the affected error codes in parentheses, and in **bold** style. Remove extensive examples. Remove all H3 headings in this section. If any errors are no longer produced in the latest version of C#, make a note of that. ## Verify error messages diff --git a/docs/csharp/language-reference/compiler-messages/string-literal.md b/docs/csharp/language-reference/compiler-messages/string-literal.md index d5619ac51f11c..3eec80a20854d 100644 --- a/docs/csharp/language-reference/compiler-messages/string-literal.md +++ b/docs/csharp/language-reference/compiler-messages/string-literal.md @@ -87,47 +87,23 @@ The following sections provide examples of common issues and how to fix them. ## Incorrectly formed string literals -The following errors concern string and character literal syntax and common mistakes when declaring literal values. - - **CS1009** - *Unrecognized escape sequence.* - **CS1010** - *Newline in constant.* - **CS1011** - *Empty character literal.* - **CS1012** - *Too many characters in character literal.* - **CS1039** - *Unterminated string literal.* -Common causes and fixes: - -- Invalid escape sequences: An unexpected character follows a backslash (`\\`). Use valid escapes (`\\n`, `\\t`, `\\uXXXX`, `\\xX`) or use verbatim (`@"..."`) or raw string literals for content that includes backslashes. -- Empty or multi-character char literals: Character literals must contain exactly one UTF-16 code unit. Use a single character like `'x'` or a string / `System.Text.Rune` for characters outside the BMP. -- Unterminated strings: Ensure every string or verbatim string has a matching closing quote. For verbatim strings, the final `"` must be present; for normal strings ensure escaped quotes are balanced. -- A string literal spans multiple lines of C# source. - -Examples - -```csharp -// CS1009 - invalid escape -string a = "\\m"; // CS1009 - invalid escape \m - -// Use verbatim strings or escape backslashes -string filename = "c:\\myFolder\\myFile.txt"; // escaped backslashes -string filenameVerbatim = @"c:\myFolder\myFile.txt"; // verbatim string - -// CS1011 - empty character literal -// public char CharField = ''; // CS1011 - invalid: empty character literal +To address these errors, try the following: -// CS1012 - too many characters in char literal -char a = 'xx'; // CS1012 - too many characters +- Use one of the standard escape sequences defined in the C# language specification, such as `\n` (newline), `\t` (tab), `\\` (backslash), or `\"` (double quote) (**CS1009**). When you need to include backslashes or special characters in strings without escaping them, use verbatim string literals (prefixed with `@`) or raw string literals (delimited with triple quotes `"""`), which treat most characters as literal text. +- Ensure your character literal contains exactly one character between single quotes (`'x'`) (**CS1011**, **CS1012**). Character literals in C# must represent a single UTF-16 code unit. For characters outside the Basic Multilingual Plane or for multiple characters, use string literals instead, or use `System.Text.Rune` for proper Unicode scalar value handling. +- Add the closing quotation mark for your string literal or verbatim string literal (**CS1039**). Regular string literals cannot span multiple lines unless you use escape sequences like `\n` or switch to verbatim strings (with `@`) or raw string literals that support multi-line content. +- Split string literals that span multiple source lines by ending each line with a closing quote and starting the next line with an opening quote, using the `+` operator to concatenate them (**CS1010**). Alternatively, use verbatim string literals or raw string literals, which allow newlines as part of the string content. -// CS1039 - unterminated verbatim string -// string b = @"hello, world; // CS1039 - missing closing quote -``` - -For more information on literal strings and escape sequences, see the articles on [verbatim strings](../tokens/verbatim.md) and [raw strings](../tokens/raw-string.md). +For more information on string literal syntax and escape sequences, see [strings](../builtin-types/reference-types.md#string-literals), [verbatim strings](../tokens/verbatim.md), and [raw string literals](../tokens/raw-string.md). ## Incorrectly formed raw string literals -The following errors are related to raw string literal syntax and usage. - - **CS8996** - *Raw string literals are not allowed in preprocessor directives.* - **CS8997** - *Unterminated raw string literal.* - **CS8998** - *Not enough starting quotes for this raw string content.* @@ -143,41 +119,37 @@ The following errors are related to raw string literal syntax and usage. - **CS9008** - *Sequence of '@' characters is not allowed.* - **CS9009** - *String must start with quote character.* -Check these common causes and fixes: - -- Unterminated or mismatched delimiters: Ensure your raw string starts and ends with the same number of consecutive double quotes (`"`). For multi-line raw strings, the opening and closing delimiter lines must appear on their own lines. -- Indentation and whitespace mismatch: The indentation of the closing delimiter defines the trimming of common leading whitespace for content lines. Make sure content lines align with that indentation. -- Insufficient quote or `$` counts for content: If the content begins with runs of quote characters or brace characters, increase the length of the delimiter (more `"`) or the number of leading `$` characters for interpolated raw strings so content can't be confused with delimiters or interpolation. -- Illegal characters or sequences: Avoid multiple `@` characters for verbatim/raw combinations and ensure you use verbatim interpolated forms when combining interpolation with multi-line raw strings. +To address these issues, try the following techniques: -The following code shows a few examples of incorrectly formed raw string literals. +- Use regular string literals or verbatim string literals instead of raw string literals in preprocessor directives like `#if`, `#define`, or `#pragma` (**CS8996**). Preprocessor directives are evaluated before the lexical analysis that recognizes raw string literals, so raw string syntax isn't supported in these contexts. +- Complete your raw string literal by adding a closing delimiter that matches the opening delimiter (**CS8997**, **CS9004**). Raw string literals must start and end with the same number of consecutive double-quote characters (at least three: `"""`), which ensures the compiler can correctly identify where the string content ends. +- Place the opening and closing delimiters of multi-line raw string literals on their own lines, with no other content on those lines (**CS9000**). This requirement ensures consistent formatting and makes the boundaries of the raw string content clear, particularly when the string spans many lines. +- Add at least one line of content between the opening and closing delimiters of your multi-line raw string literal (**CS9002**). Multi-line raw strings are designed to contain text that spans multiple lines, so the delimiters must enclose actual content rather than appearing on consecutive lines. +- Adjust the indentation of your raw string content lines to match the indentation of the closing delimiter line (**CS8999**, **CS9003**). The compiler uses the closing delimiter's leading whitespace to determine how much whitespace to trim from each content line, so inconsistent indentation prevents proper whitespace removal and causes these errors. +- Increase the number of double-quote characters in your raw string delimiter to be greater than any consecutive run of quotes in the content (**CS8998**). This ensures the compiler can distinguish between quote characters that are part of the content and the closing delimiter sequence. +- For interpolated raw string literals, ensure the number of dollar signs (`$`) at the start matches the number of consecutive opening or closing braces you need in the content (**CS9005**, **CS9006**, **CS9007**). For example, use `$$"""` to allow single braces as content while still supporting interpolations with `{{` and `}}`. +- Use verbatim interpolated string format (`$@"..."`) when combining interpolation with multi-line strings (**CS9001**). Raw string literals support interpolation through the `$` prefix, but multi-line raw strings with interpolation require the verbatim format to correctly handle both features together. +- Start your raw string literal with quote characters only, without any `@` prefix (**CS9008**, **CS9009**). Raw string literals are a distinct syntax that doesn't use the `@` verbatim prefix, and attempting to combine `@` with raw string delimiters isn't valid syntax. -```csharp -// Unterminated raw string (CS8997) -var s = """This raw string never ends... - -// Delimiter must be on its own line (CS9000) -var t = """First line - More text - """; -``` - -For full syntax and more examples, see the [language reference on raw string literals](../tokens/raw-string.md). +For complete syntax rules and examples, see the [language reference on raw string literals](../tokens/raw-string.md). ## UTF-8 string literals - **CS9026** - *The input string cannot be converted into the equivalent UTF-8 byte representation.* - **CS9047** - *Operator cannot be applied to operands that are not UTF-8 byte representations.* -**CS9026** occurs when a string literal with the `u8` suffix contains characters or escape sequences that cannot be represented in UTF-8. The most common cause is attempting to use surrogate code points directly in a UTF-8 string literal. UTF-8 string literals must contain valid Unicode text that can be encoded as UTF-8 bytes. +Remove characters or escape sequences that can't be encoded in UTF-8 from your `u8` string literal (**CS9026**). UTF-8 encoding supports the full Unicode character set but requires valid Unicode scalar values, so surrogate code points (values in the range U+D800 through U+DFFF) can't appear directly in UTF-8 strings because they're reserved for UTF-16 encoding pairs rather than standalone characters. -**CS9047** occurs when you attempt to use the addition operator (`+`) to concatenate UTF-8 string literals with non-UTF-8 operands. The addition operator for UTF-8 strings only works when both operands are UTF-8 byte representations (`ReadOnlySpan`). +Ensure both operands of the addition operator are UTF-8 string literals when concatenating UTF-8 strings (**CS9047**). The compiler provides special support for concatenating UTF-8 string literals (which produce `ReadOnlySpan` values), but mixing UTF-8 strings with regular strings or other types isn't supported because the resulting type would be ambiguous and the byte representations are incompatible. -For more information on UTF-8 string literals, see the [language reference on UTF-8 string literals](../builtin-types/reference-types.md#utf-8-string-literals). +For more information on UTF-8 string literals and their type conversions, see the [language reference on UTF-8 string literals](../builtin-types/reference-types.md#utf-8-string-literals). ## Literal strings in data sections - **CS9274**: *Cannot emit this string literal into the data section because it has XXHash128 collision with another string literal.* - **CS9315**: *Combined length of user strings used by the program exceeds allowed limit. Adding a string literal requires restarting the application.* -**CS9274** indicate that your declaration can't be emitted in the data section. Disable this feature for your application. Debugging tools emit **CS9315** after you changed string data in the data section while debugging and your app must be restarted. +To address these errors, try the following: + +- Disable the experimental data section string literals feature for your application when you encounter a hash collision (**CS9274**). This error indicates that two different string literals produced the same XXHash128 value, which prevents the optimization from working correctly, so you should remove the feature flag that enables this experimental behavior. +- Restart your application after modifying string literals during a debugging session when the data section feature is enabled (**CS9315**). The hot reload infrastructure can't update string literals stored in the data section because they're embedded in a special format that can't be modified at runtime, so continuing execution with the old string values would produce incorrect behavior. From 220686d3a486d2ad18bf5d648dccdb25debbfee9 Mon Sep 17 00:00:00 2001 From: Bill Wagner Date: Mon, 8 Dec 2025 15:57:18 -0500 Subject: [PATCH 03/11] copy edit --- .../compiler-messages/string-literal.md | 41 +++++++++---------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/docs/csharp/language-reference/compiler-messages/string-literal.md b/docs/csharp/language-reference/compiler-messages/string-literal.md index 3eec80a20854d..70559d8889108 100644 --- a/docs/csharp/language-reference/compiler-messages/string-literal.md +++ b/docs/csharp/language-reference/compiler-messages/string-literal.md @@ -1,6 +1,6 @@ --- -title: Errors and warnings for string literal declarations -description: This article helps you diagnose and correct compiler errors and warnings when you declare string literals as constants or variables. +title: Resolve errors and warnings for string literal declarations +description: Learn how to diagnose and correct C# compiler errors and warnings when you declare string literals, including basic strings, raw strings, and UTF-8 strings. f1_keywords: - "CS1009" - "CS1011" @@ -49,12 +49,12 @@ helpviewer_keywords: - "CS9047" - "CS9274" - "CS9315" -ms.date: 10/09/2025 +ms.date: 12/08/2025 ai-usage: ai-assisted --- -# Errors and warnings for string literal declarations +# Resolve errors and warnings for string literal declarations -There are several errors related to declaring string constants or string literals. +The C# compiler generates errors and warnings when you declare string literals with incorrect syntax or use them in unsupported contexts. These diagnostics help you identify issues with basic string literals, character literals, raw string literals, and UTF-8 string literals.