biomejs · afonsojramos · Mar 7, 2025 · Mar 7, 2025 · Mar 7, 2025 · Mar 7, 2025
diff --git a/crates/biome_cli/src/commands/lint.rs b/crates/biome_cli/src/commands/lint.rs
@@ -83,12 +83,14 @@ impl CommandRunner for LintCommandPayload {
                 .get_or_insert_with(Default::default);
             graphql.linter.merge_with(self.graphql_linter.clone());
         }
+
         if self.javascript_linter.is_some() {
             let javascript = fs_configuration
                 .javascript
                 .get_or_insert_with(Default::default);
             javascript.linter.merge_with(self.javascript_linter.clone());
         }
+
         if self.json_linter.is_some() {
             let json = fs_configuration.json.get_or_insert_with(Default::default);
             json.linter.merge_with(self.json_linter.clone());

diff --git a/crates/biome_markdown_factory/src/lib.rs b/crates/biome_markdown_factory/src/lib.rs
@@ -8,6 +8,6 @@ pub use crate::generated::MarkdownSyntaxFactory;
 #[doc(hidden)]
 pub use biome_markdown_syntax as syntax;
 
-pub type DemoSyntaxTreeBuilder = TreeBuilder<'static, MarkdownLanguage, MarkdownSyntaxFactory>;
+pub type MarkdownSyntaxTreeBuilder = TreeBuilder<'static, MarkdownLanguage, MarkdownSyntaxFactory>;
 
 pub mod make;
diff --git a/crates/biome_markdown_factory/src/make.rs b/crates/biome_markdown_factory/src/make.rs
@@ -1 +1,123 @@
+use biome_markdown_syntax::{MarkdownSyntaxKind, MarkdownSyntaxToken};
+
 pub use crate::generated::node_factory::*;
+
+/// Create a textual token
+pub fn textual(text: &str) -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::MD_TEXTUAL_LITERAL, text, [], [])
+}
+
+/// Create a string token
+pub fn string(text: &str) -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::MD_STRING_LITERAL, text, [], [])
+}
+
+/// Create a hash token for headers
+pub fn hash() -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::HASH, "#", [], [])
+}
+
+/// Create a backtick token
+pub fn backtick() -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::BACKTICK, "`", [], [])
+}
+
+/// Create a star token for emphasis
+pub fn star() -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::STAR, "*", [], [])
+}
+
+/// Create an underscore token for emphasis
+pub fn underscore() -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::UNDERSCORE, "_", [], [])
+}
+
+/// Create a left bracket token
+pub fn l_brack() -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::L_BRACK, "[", [], [])
+}
+
+/// Create a right bracket token
+pub fn r_brack() -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::R_BRACK, "]", [], [])
+}
+
+/// Create a left parenthesis token
+pub fn l_paren() -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::L_PAREN, "(", [], [])
+}
+
+/// Create a right parenthesis token
+pub fn r_paren() -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::R_PAREN, ")", [], [])
+}
+
+/// Create a bang token for images
+pub fn bang() -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::BANG, "!", [], [])
+}
+
+/// Create a minus token for thematic breaks
+pub fn minus() -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::MINUS, "-", [], [])
+}
+
+/// Create a thematic break token
+pub fn thematic_break() -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::MD_THEMATIC_BREAK_LITERAL, "---", [], [])
+}
+
+/// Create a newline token
+pub fn newline() -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::NEWLINE, "\n", [], [])
+}
+
+/// Create a whitespace token
+pub fn whitespace(text: &str) -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::WHITESPACE, text, [], [])
+}
+
+/// Create a tab token
+pub fn tab() -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::TAB, "\t", [], [])
+}
+
+/// Create an indent chunk token for indented code blocks
+pub fn indent_chunk() -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::MD_INDENT_CHUNK_LITERAL, "    ", [], [])
+}
+
+/// Create a hard line break token
+pub fn hard_line_break() -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::MD_HARD_LINE_LITERAL, "  \n", [], [])
+}
+
+/// Create a greater than token for blockquotes
+pub fn greater_than() -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::R_ANGLE, ">", [], [])
+}
+
+/// Create a plus token for unordered lists
+pub fn plus() -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::PLUS, "+", [], [])
+}
+
+/// Create a digit token for ordered lists
+pub fn digit(text: &str) -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::DIGIT, text, [], [])
+}
+
+/// Create a period token for ordered lists
+pub fn period() -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::PERIOD, ".", [], [])
+}
+
+/// Create a pipe token for tables
+pub fn pipe() -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::PIPE, "|", [], [])
+}
+
+/// Create a colon token for table alignment
+pub fn colon() -> MarkdownSyntaxToken {
+    MarkdownSyntaxToken::new_detached(MarkdownSyntaxKind::COLON, ":", [], [])
+}
diff --git a/crates/biome_markdown_parser/src/lexer/mod.rs b/crates/biome_markdown_parser/src/lexer/mod.rs
@@ -180,7 +180,11 @@ impl<'src> MarkdownLexer<'src> {
         let dispatched = lookup_byte(current);
         match dispatched {
             WHS => self.consume_newline_or_whitespace(),
-            MUL | MIN | IDT => self.consume_thematic_break_literal(),
+            MUL => self.consume_star(),
+            MIN => self.consume_minus(),
+            IDT => self.consume_underscore(),
+            DIG => self.consume_digit(),
+            PLS => self.consume_plus(),
             _ => self.consume_textual(),
         }
     }
@@ -269,31 +273,128 @@ impl<'src> MarkdownLexer<'src> {
         TAB
     }
 
-    fn consume_thematic_break_literal(&mut self) -> MarkdownSyntaxKind {
+    fn consume_star(&mut self) -> MarkdownSyntaxKind {
         self.assert_at_char_boundary();
 
-        let start_char = match self.current_byte() {
-            Some(b'-') => b'-',
-            Some(b'*') => b'*',
-            Some(b'_') => b'_',
-            _ => return self.consume_textual(),
-        };
+        // First check if this might be a thematic break
+        let checkpoint = self.position;
 
-        let mut count = 0;
-        loop {
-            self.consume_whitespace();
-            if matches!(self.current_byte(), Some(ch) if ch == start_char) {
-                self.advance(1);
-                count += 1;
+        // Try to recognize thematic breaks like "***" or "* * *"
+        if self.is_thematic_break(b'*') {
+            return MD_THEMATIC_BREAK_LITERAL;
+        }
+
+        // Reset position after thematic break check
+        self.position = checkpoint;
+
+        // Check for list marker (* )
+        self.advance(1); // Consume the star
+
+        if matches!(self.current_byte(), Some(b' ' | b'\t')) {
+            // It's a list marker
+            return STAR;
+        }
+
+        // Not a special token, just a regular star
+        self.position = checkpoint;
+        self.consume_textual()
+    }
+
+    fn consume_minus(&mut self) -> MarkdownSyntaxKind {
+        self.assert_at_char_boundary();
+
+        // First check if this might be a thematic break
+        let checkpoint = self.position;
+
+        // Try to recognize thematic breaks like "---" or "- - -"
+        if self.is_thematic_break(b'-') {
+            return MD_THEMATIC_BREAK_LITERAL;
+        }
+
+        // Reset position after thematic break check
+        self.position = checkpoint;
+
+        // Check for list marker (- )
+        self.advance(1); // Consume the minus
+
+        if matches!(self.current_byte(), Some(b' ' | b'\t')) {
+            // It's a list marker
+            return MINUS;
+        }
+
+        // Not a special token, just a regular minus
+        self.position = checkpoint;
+        self.consume_textual()
+    }
+
+    fn consume_underscore(&mut self) -> MarkdownSyntaxKind {
+        self.assert_at_char_boundary();
+
+        // Check if this is a thematic break
+        let checkpoint = self.position;
+
+        // Try to recognize thematic breaks like "___" or "_ _ _"
+        if self.is_thematic_break(b'_') {
+            return MD_THEMATIC_BREAK_LITERAL;
+        }
+
+        // Not a thematic break or emphasis
+        self.position = checkpoint;
+        self.consume_textual()
+    }
+
+    /// Check if the current position starts a thematic break
+    /// This handles patterns like "---", "***", "___" as well as "- - -", "* * *", "_ _ _"
+    fn is_thematic_break(&mut self, marker: u8) -> bool {
+        let mut marker_count = 0;
+        let mut pos = self.position;
+        let src = self.source.as_bytes();
+
+        while pos < src.len() {
+            if pos < src.len() && src[pos] == marker {
+                marker_count += 1;
+                pos += 1;
+            } else if pos < src.len() && (src[pos] == b' ' || src[pos] == b'\t') {
+                pos += 1;
             } else {
                 break;
             }
         }
-        // until next newline or eof
-        if matches!(self.current_byte(), Some(b'\n' | b'\r') | None) && count >= 3 {
-            return MD_THEMATIC_BREAK_LITERAL;
+
+        // A valid thematic break must have at least 3 markers and be followed by a newline or EOF
+        if marker_count >= 3 && (pos >= src.len() || src[pos] == b'\n' || src[pos] == b'\r') {
+            // Consume the entire thematic break
+            self.position = pos;
+            return true;
         }
-        ERROR_TOKEN
+
+        false
+    }
+
+    fn consume_digit(&mut self) -> MarkdownSyntaxKind {
+        self.assert_at_char_boundary();
+
+        // Check if this is a list marker (1. ) or just a digit
+        let checkpoint = self.position;
+
+        // Consume all digits
+        while matches!(self.current_byte(), Some(b'0'..=b'9')) {
+            self.advance(1);
+        }
+
+        // Check for period or closing parenthesis
+        if matches!(self.current_byte(), Some(b'.') | Some(b')')) {
+            self.advance(1);
+
+            // Check for whitespace
+            if matches!(self.current_byte(), Some(b' ' | b'\t')) {
+                return DIGIT;
+            }
+        }
+
+        // Not a list marker, reset and parse as textual
+        self.position = checkpoint;
+        self.consume_textual()
     }
 
     /// Get the UTF8 char which starts at the current byte
@@ -368,6 +469,23 @@ impl<'src> MarkdownLexer<'src> {
         self.advance(1);
         tok
     }
+
+    fn consume_plus(&mut self) -> MarkdownSyntaxKind {
+        self.assert_at_char_boundary();
+
+        // Check for list marker (+ )
+        let checkpoint = self.position;
+        self.advance(1); // Consume the plus
+
+        if matches!(self.current_byte(), Some(b' ' | b'\t')) {
+            // It's a list marker
+            return PLUS;
+        }
+
+        // Not a special token, just a regular plus
+        self.position = checkpoint;
+        self.consume_textual()
+    }
 }
 
 impl<'src> ReLexer<'src> for MarkdownLexer<'src> {

diff --git a/crates/biome_markdown_parser/src/lexer/tests.rs b/crates/biome_markdown_parser/src/lexer/tests.rs
@@ -162,3 +162,57 @@ _ _ _ _  _ "#,
         MD_THEMATIC_BREAK_LITERAL:11,
     }
 }
+
+// This is a test for list markers
+#[test]
+fn list_markers() {
+    // Testing with a star list marker
+    let mut lexer = MarkdownLexer::from_str("* List item");
+    let mut tokens = Vec::new();
+
+    while lexer.next_token(MarkdownLexContext::default()) != EOF {
+        tokens.push(lexer.current());
+    }
+
+    assert_eq!(tokens[0], STAR);
+    assert_eq!(tokens[1], WHITESPACE);
+
+    // Testing with a minus list marker
+    let mut lexer = MarkdownLexer::from_str("- List item");
+    let mut tokens = Vec::new();
+
+    while lexer.next_token(MarkdownLexContext::default()) != EOF {
+        tokens.push(lexer.current());
+    }
+
+    assert_eq!(tokens[0], MINUS);
+    assert_eq!(tokens[1], WHITESPACE);
+
+    // Testing with a plus list marker
+    let mut lexer = MarkdownLexer::from_str("+ List item");
+    let mut tokens = Vec::new();
+
+    while lexer.next_token(MarkdownLexContext::default()) != EOF {
+        tokens.push(lexer.current());
+    }
+
+    assert_eq!(tokens[0], PLUS);
+    assert_eq!(tokens[1], WHITESPACE);
+
+    // Testing with a digit list marker
+    let mut lexer = MarkdownLexer::from_str("1. List item");
+    let mut tokens = Vec::new();
+
+    while lexer.next_token(MarkdownLexContext::default()) != EOF {
+        tokens.push(lexer.current());
+    }
+
+    // The token sequence for "1. List item" should be:
+    // 1. DIGIT for "1"
+    // 2. MD_TEXTUAL_LITERAL for "."
+    // 3. WHITESPACE for " "
+    // 4+. MD_TEXTUAL_LITERAL for each letter in "List item"
+    assert_eq!(tokens[0], DIGIT);
+    assert_eq!(tokens[1], MD_TEXTUAL_LITERAL);
+    assert_eq!(tokens[2], WHITESPACE);
+}