// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Globalization;
using Xunit;

namespace System.Text.RegularExpressions.Tests
{
    public class RegexReductionTests
    {
        [Theory]
        // Two greedy one loops
        [InlineData("a*a*", "a*")]
        [InlineData("(a*a*)", "(a*)")]
        [InlineData("a*(?:a*)", "a*")]
        [InlineData("a*a+", "a+")]
        [InlineData("a*a?", "a*")]
        [InlineData("a*a{1,3}", "a+")]
        [InlineData("a+a*", "a+")]
        [InlineData("a+a+", "a{2,}")]
        [InlineData("a+a?", "a+")]
        [InlineData("a+a{1,3}", "a{2,}")]
        [InlineData("a?a*", "a*")]
        [InlineData("a?a+", "a+")]
        [InlineData("a?a?", "a{0,2}")]
        [InlineData("a?a{1,3}", "a{1,4}")]
        [InlineData("a{1,3}a*", "a+")]
        [InlineData("a{1,3}a+", "a{2,}")]
        [InlineData("a{1,3}a?", "a{1,4}")]
        [InlineData("a{1,3}a{1,3}", "a{2,6}")]
        // Greedy one loop and one
        [InlineData("a*a", "a+")]
        [InlineData("a+a", "a{2,}")]
        [InlineData("a?a", "a{1,2}")]
        [InlineData("a{1,3}a", "a{2,4}")]
        [InlineData("aa*", "a+")]
        [InlineData("aa+", "a{2,}")]
        [InlineData("aa?", "a{1,2}")]
        [InlineData("aa{1,3}", "a{2,4}")]
        [InlineData("aaa+b", "aa(?>a+)b")]
        [InlineData("a+aab", "(?>a{3,})b")]
        [InlineData("a{3}b", "aaab")]
        [InlineData("a{2}ab", "aaab")]
        [InlineData("aa{2}b", "aaab")]
        [InlineData("ca{3}b", "caaab")]
        [InlineData("caa{2}b", "caaab")]
        [InlineData("ca{2}ab", "caaab")]
        [InlineData("ca{2}", "caa")]
        [InlineData(@"ca{2}\w", @"caa\w")]
        // Two atomic one loops
        [InlineData("(?>a*)(?>a*)", "(?>a*)")]
        [InlineData("(?>a*)(?>(?:a*))", "(?>a*)")]
        [InlineData("(?>a*)(?>a?)", "(?>a*)")]
        [InlineData("(?>a+)(?>a*)", "(?>a+)")]
        [InlineData("(?>a+)(?>a?)", "(?>a+)")]
        [InlineData("(?>a?)(?>a*)", "(?>a*)")]
        [InlineData("(?>a?)(?>a?)", "(?>a{0,2})")]
        [InlineData("(?>a{1,3})(?>a*)", "(?>a+)")]
        [InlineData("(?>a{1,3})(?>a?)", "(?>a{1,4})")]
        // One and atomic one loop
        [InlineData("a(?>a*)", "(?>a+)")]
        [InlineData("a(?>a+)", "(?>a{2,})")]
        [InlineData("a(?>a?)", "(?>a{1,2})")]
        [InlineData("a(?>a{1,3})", "(?>a{2,4})")]
        // Two lazy one loops
        [InlineData("a*?a*?", "a*?")]
        [InlineData("a*?a+?", "a+?")]
        [InlineData("a*?a??", "a*?")]
        [InlineData("a*?a{1,3}?", "a+?")]
        [InlineData("a+?a*?", "a+?")]
        [InlineData("a+?a+?", "a{2,}?")]
        [InlineData("a+?a??", "a+?")]
        [InlineData("a+?a{1,3}?", "a{2,}?")]
        [InlineData("a??a*?", "a*?")]
        [InlineData("a??a+?", "a+?")]
        [InlineData("a??a??", "a{0,2}?")]
        [InlineData("a??a{1,3}?", "a{1,4}?")]
        [InlineData("a{1,3}?a*?", "a+?")]
        [InlineData("a{1,3}?a+?", "a{2,}?")]
        [InlineData("a{1,3}?a??", "a{1,4}?")]
        [InlineData("a{1,3}?a{1,3}?", "a{2,6}?")]
        // Lazy one loop and one
        [InlineData("a*?a", "a+?")]
        [InlineData("a+?a", "a{2,}?")]
        [InlineData("a??a", "a{1,2}?")]
        [InlineData("a{1,3}?a", "a{2,4}?")]
        [InlineData("aa*?", "a+?")]
        [InlineData("aa+?", "a{2,}?")]
        [InlineData("aa??", "a{1,2}?")]
        [InlineData("aa{1,3}?", "a{2,4}?")]
        // Two greedy notone loops
        [InlineData("[^a]*[^a]*", "[^a]*")]
        [InlineData("[^a]*[^a]+", "[^a]+")]
        [InlineData("[^a]*[^a]?", "[^a]*")]
        [InlineData("[^a]*[^a]{1,3}", "[^a]+")]
        [InlineData("[^a]+[^a]*", "[^a]+")]
        [InlineData("[^a]+[^a]+", "[^a]{2,}")]
        [InlineData("[^a]+[^a]?", "[^a]+")]
        [InlineData("[^a]+[^a]{1,3}", "[^a]{2,}")]
        [InlineData("[^a]?[^a]*", "[^a]*")]
        [InlineData("[^a]?[^a]+", "[^a]+")]
        [InlineData("[^a]?[^a]?", "[^a]{0,2}")]
        [InlineData("[^a]?[^a]{1,3}", "[^a]{1,4}")]
        [InlineData("[^a]{1,3}[^a]*", "[^a]+")]
        [InlineData("[^a]{1,3}[^a]+", "[^a]{2,}")]
        [InlineData("[^a]{1,3}[^a]?", "[^a]{1,4}")]
        [InlineData("[^a]{1,3}[^a]{1,3}", "[^a]{2,6}")]
        // Two lazy notone loops
        [InlineData("[^a]*?[^a]*?", "[^a]*?")]
        [InlineData("[^a]*?[^a]+?", "[^a]+?")]
        [InlineData("[^a]*?[^a]??", "[^a]*?")]
        [InlineData("[^a]*?[^a]{1,3}?", "[^a]+?")]
        [InlineData("[^a]+?[^a]*?", "[^a]+?")]
        [InlineData("[^a]+?[^a]+?", "[^a]{2,}?")]
        [InlineData("[^a]+?[^a]??", "[^a]+?")]
        [InlineData("[^a]+?[^a]{1,3}?", "[^a]{2,}?")]
        [InlineData("[^a]??[^a]*?", "[^a]*?")]
        [InlineData("[^a]??[^a]+?", "[^a]+?")]
        [InlineData("[^a]??[^a]??", "[^a]{0,2}?")]
        [InlineData("[^a]??[^a]{1,3}?", "[^a]{1,4}?")]
        [InlineData("[^a]{1,3}?[^a]*?", "[^a]+?")]
        [InlineData("[^a]{1,3}?[^a]+?", "[^a]{2,}?")]
        [InlineData("[^a]{1,3}?[^a]??", "[^a]{1,4}?")]
        [InlineData("[^a]{1,3}?[^a]{1,3}?", "[^a]{2,6}?")]
        // Two atomic notone loops
        [InlineData("(?>[^a]*)(?>[^a]*)", "(?>[^a]*)")]
        [InlineData("(?>[^a]*)(?>[^a]?)", "(?>[^a]*)")]
        [InlineData("(?>[^a]+)(?>[^a]*)", "(?>[^a]+)")]
        [InlineData("(?>[^a]+)(?>[^a]?)", "(?>[^a]+)")]
        [InlineData("(?>[^a]?)(?>[^a]*)", "(?>[^a]*)")]
        [InlineData("(?>[^a]?)(?>[^a]?)", "(?>[^a]{0,2})")]
        [InlineData("(?>[^a]{1,3})(?>[^a]*)", "(?>[^a]+)")]
        [InlineData("(?>[^a]{1,3})(?>[^a]?)", "(?>[^a]{1,4})")]
        // Greedy notone loop and notone
        [InlineData("[^a]*[^a]", "[^a]+")]
        [InlineData("[^a]+[^a]", "[^a]{2,}")]
        [InlineData("[^a]?[^a]", "[^a]{1,2}")]
        [InlineData("[^a]{1,3}[^a]", "[^a]{2,4}")]
        [InlineData("[^a][^a]*", "[^a]+")]
        [InlineData("[^a][^a]+", "[^a]{2,}")]
        [InlineData("[^a][^a]?", "[^a]{1,2}")]
        [InlineData("[^a][^a]{1,3}", "[^a]{2,4}")]
        // Lazy notone loop and notone
        [InlineData("[^a]*?[^a]", "[^a]+?")]
        [InlineData("[^a]+?[^a]", "[^a]{2,}?")]
        [InlineData("[^a]??[^a]", "[^a]{1,2}?")]
        [InlineData("[^a]{1,3}?[^a]", "[^a]{2,4}?")]
        [InlineData("[^a][^a]*?", "[^a]+?")]
        [InlineData("[^a][^a]+?", "[^a]{2,}?")]
        [InlineData("[^a][^a]??", "[^a]{1,2}?")]
        [InlineData("[^a][^a]{1,3}?", "[^a]{2,4}?")]
        // Notone and atomic notone loop
        [InlineData("[^a](?>[^a]*)", "(?>[^a]+)")]
        [InlineData("[^a](?>[^a]+)", "(?>[^a]{2,})")]
        [InlineData("[^a](?>[^a]?)", "(?>[^a]{1,2})")]
        [InlineData("[^a](?>[^a]{1,3})", "(?>[^a]{2,4})")]
        // Notone and notone
        [InlineData("[^a][^a]", "[^a]{2}")]
        // Two greedy set loops
        [InlineData("[0-9]*[0-9]*", "[0-9]*")]
        [InlineData("[0-9]*[0-9]+", "[0-9]+")]
        [InlineData("[0-9]*[0-9]?", "[0-9]*")]
        [InlineData("[0-9]*[0-9]{1,3}", "[0-9]+")]
        [InlineData("[0-9]+[0-9]*", "[0-9]+")]
        [InlineData("[0-9]+[0-9]+", "[0-9]{2,}")]
        [InlineData("[0-9]+[0-9]?", "[0-9]+")]
        [InlineData("[0-9]+[0-9]{1,3}", "[0-9]{2,}")]
        [InlineData("[0-9]?[0-9]*", "[0-9]*")]
        [InlineData("[0-9]?[0-9]+", "[0-9]+")]
        [InlineData("[0-9]?[0-9]?", "[0-9]{0,2}")]
        [InlineData("[0-9]?[0-9]{1,3}", "[0-9]{1,4}")]
        [InlineData("[0-9]{1,3}[0-9]*", "[0-9]+")]
        [InlineData("[0-9]{1,3}[0-9]+", "[0-9]{2,}")]
        [InlineData("[0-9]{1,3}[0-9]?", "[0-9]{1,4}")]
        [InlineData("[0-9]{1,3}[0-9]{1,3}", "[0-9]{2,6}")]
        // Greedy set loop and set
        [InlineData("[0-9]*[0-9]", "[0-9]+")]
        [InlineData("[0-9]+[0-9]", "[0-9]{2,}")]
        [InlineData("[0-9]?[0-9]", "[0-9]{1,2}")]
        [InlineData("[0-9]{1,3}[0-9]", "[0-9]{2,4}")]
        [InlineData("[0-9][0-9]*", "[0-9]+")]
        [InlineData("[0-9][0-9]+", "[0-9]{2,}")]
        [InlineData("[0-9][0-9]?", "[0-9]{1,2}")]
        [InlineData("[0-9][0-9]{1,3}", "[0-9]{2,4}")]
        // Set and atomic set loop
        [InlineData("[0-9](?>[0-9]*)", "(?>[0-9]+)")]
        [InlineData("[0-9](?>[0-9]+)", "(?>[0-9]{2,})")]
        [InlineData("[0-9](?>[0-9]?)", "(?>[0-9]{1,2})")]
        [InlineData("[0-9](?>[0-9]{1,3})", "(?>[0-9]{2,4})")]
        // Two lazy set loops
        [InlineData("[0-9]*?[0-9]*?", "[0-9]*?")]
        [InlineData("[0-9]*?[0-9]+?", "[0-9]+?")]
        [InlineData("[0-9]*?[0-9]??", "[0-9]*?")]
        [InlineData("[0-9]*?[0-9]{1,3}?", "[0-9]+?")]
        [InlineData("[0-9]+?[0-9]*?", "[0-9]+?")]
        [InlineData("[0-9]+?[0-9]+?", "[0-9]{2,}?")]
        [InlineData("[0-9]+?[0-9]??", "[0-9]+?")]
        [InlineData("[0-9]+?[0-9]{1,3}?", "[0-9]{2,}?")]
        [InlineData("[0-9]??[0-9]*?", "[0-9]*?")]
        [InlineData("[0-9]??[0-9]+?", "[0-9]+?")]
        [InlineData("[0-9]??[0-9]??", "[0-9]{0,2}?")]
        [InlineData("[0-9]??[0-9]{1,3}?", "[0-9]{1,4}?")]
        [InlineData("[0-9]{1,3}?[0-9]*?", "[0-9]+?")]
        [InlineData("[0-9]{1,3}?[0-9]+?", "[0-9]{2,}?")]
        [InlineData("[0-9]{1,3}?[0-9]??", "[0-9]{1,4}?")]
        [InlineData("[0-9]{1,3}?[0-9]{1,3}?", "[0-9]{2,6}?")]
        // Two atomic set loops
        [InlineData("(?>[0-9]*)(?>[0-9]*)", "(?>[0-9]*)")]
        [InlineData("(?>[0-9]*)(?>[0-9]?)", "(?>[0-9]*)")]
        [InlineData("(?>[0-9]+)(?>[0-9]*)", "(?>[0-9]+)")]
        [InlineData("(?>[0-9]+)(?>[0-9]?)", "(?>[0-9]+)")]
        [InlineData("(?>[0-9]?)(?>[0-9]*)", "(?>[0-9]*)")]
        [InlineData("(?>[0-9]?)(?>[0-9]?)", "(?>[0-9]{0,2})")]
        [InlineData("(?>[0-9]{1,3})(?>[0-9]*)", "(?>[0-9]+)")]
        [InlineData("(?>[0-9]{1,3})(?>[0-9]?)", "(?>[0-9]{1,4})")]
        // Lazy set loop and set
        [InlineData("[0-9]*?[0-9]", "[0-9]+?")]
        [InlineData("[0-9]+?[0-9]", "[0-9]{2,}?")]
        [InlineData("[0-9]??[0-9]", "[0-9]{1,2}?")]
        [InlineData("[0-9]{1,3}?[0-9]", "[0-9]{2,4}?")]
        [InlineData("[0-9][0-9]*?", "[0-9]+?")]
        [InlineData("[0-9][0-9]+?", "[0-9]{2,}?")]
        [InlineData("[0-9][0-9]??", "[0-9]{1,2}?")]
        [InlineData("[0-9][0-9]{1,3}?", "[0-9]{2,4}?")]
        // Set and set
        [InlineData("[ace][ace]", "[ace]{2}")]
        // Set and one
        [InlineData("[a]", "a")]
        [InlineData("[a]*", "a*")]
        [InlineData("(?>[a]*)", "(?>a*)")]
        [InlineData("[a]*?", "a*?")]
        // Set and notone
        [InlineData("[^\n]", ".")]
        [InlineData("[^\n]*", ".*")]
        [InlineData("(?>[^\n]*)", "(?>.*)")]
        [InlineData("[^\n]*?", ".*?")]
        // Set reduction
        [InlineData("[\u0001-\uFFFF]", "[^\u0000]")]
        [InlineData("[\u0000-\uFFFE]", "[^\uFFFF]")]
        [InlineData("[\u0000-AB-\uFFFF]", "[\u0000-\uFFFF]")]
        [InlineData("[ABC-EG-J]", "[A-EG-J]")]
        [InlineData("[\u0000-AC-\uFFFF]", "[^B]")]
        [InlineData("[\u0000-AF-\uFFFF]", "[^B-E]")]
        // Large loop patterns
        [InlineData("a*a*a*a*a*a*a*b*b*?a+a*", "a*b*b*?a+")]
        [InlineData("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "a{0,30}aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")]
        // Group elimination
        [InlineData("(?:(?:(?:(?:(?:(?:a*))))))", "a*")]
        // Nested loops
        [InlineData("(?:a*)*", "a*")]
        [InlineData("(?:a*)+", "a*")]
        [InlineData("(?:a+){4}", "a{4,}")]
        [InlineData("(?:a{1,2}){4}", "a{4,8}")]
        // Nested atomic
        [InlineData("(?>(?>(?>(?>abc*))))", "(?>ab(?>c*))")]
        [InlineData("(?>(?>(?>(?>))))", "")]
        [InlineData("(?>(?>(?>(?>(?!)))))", "(?!)")]
        [InlineData("(?=(?>))", "")]
        // Alternation reduction
        [InlineData("a|b", "[ab]")]
        [InlineData("a|b|c|d|e|g|h|z", "[a-eghz]")]
        [InlineData("a|b|c|def|g|h", "(?>[a-c]|def|[gh])")]
        [InlineData("this|that|there|then|those", "th(?>is|at|ere|en|ose)")]
        [InlineData("it's (?>this|that|there|then|those)", "it's (?>th(?>is|at|e(?>re|n)|ose))")]
        [InlineData("it's (?>this|that|there|then|those)!", "it's (?>th(?>is|at|e(?>re|n)|ose))!")]
        [InlineData("abcd|abce", "abc[de]")]
        [InlineData("abcd|abef", "ab(?>cd|ef)")]
        [InlineData("abcd|aefg", "a(?>bcd|efg)")]
        [InlineData("abcd|abc|ab|a", "a(?>bcd|bc|b|)")]
        // [InlineData("abcde|abcdef", "abcde(?>|f)")] // TODO https://github.com/dotnet/runtime/issues/66031: Need to reorganize optimizations to avoid an extra Empty being left at the end of the tree
        [InlineData("abcdef|abcde", "abcde(?>f|)")]
        [InlineData("abcdef|abcdeg|abcdeh|abcdei|abcdej|abcdek|abcdel", "abcde[f-l]")]
        [InlineData("(ab|ab*)bc", "(a(?:b|b*))bc")]
        [InlineData("abc(?:defgh|defij)klmn", "abcdef(?:gh|ij)klmn")]
        [InlineData("abc(defgh|defij)klmn", "abc(def(?:gh|ij))klmn")]
        [InlineData("a[b-f]|a[g-k]", "a[b-k]")]
        [InlineData("this|this", "this")]
        [InlineData("this|this|this", "this")]
        [InlineData("hello there|hello again|hello|hello|hello|hello", "hello(?> there| again|)")]
        [InlineData("hello there|hello again|hello|hello|hello|hello|hello world", "hello(?> there| again|)")]
        [InlineData("hello there|hello again|hello|hello|hello|hello|hello world|hello", "hello(?> there| again|)")]
        [InlineData("ab|cd|||ef", "ab|cd|")]
        [InlineData("|ab|cd|e||f", "")]
        [InlineData("ab|cd|||||||||||ef", "ab|cd|")]
        [InlineData("ab|cd|||||||||||e||f|||", "ab|cd|")]
        [InlineData("ab|cd|(?!)|ef", "ab|cd|ef")]
        [InlineData("abcd(?:(?i:e)|(?i:f))", "abcd(?i:[ef])")]
        [InlineData("(?i:abcde)|(?i:abcdf)", "(?i:abcd[ef])")]
        [InlineData("xyz(?:(?i:abcde)|(?i:abcdf))", "xyz(?i:abcd[ef])")]
        [InlineData("bonjour|hej|ciao|shalom|zdravo|pozdrav|hallo|hola|hello|hey|witam|tere|bonjou|salam|helo|sawubona", "(?>bonjou(?>r|)|h(?>e(?>j|(?>l(?>lo|o)|y))|allo|ola)|ciao|s(?>halom|a(?>lam|wubona))|zdravo|pozdrav|witam|tere)")]
        [InlineData("\\w\\d123|\\w\\dabc", "\\w\\d(?:123|abc)")]
        [InlineData("(a)(?(1)b)", "(a)(?(1)b|)")]
        [InlineData("(abc)(?(1)def)", "(abc)(?(1)def|)")]
        [InlineData("(?(a)a)", "(?(a)a|)")]
        [InlineData("(?(abc)def)", "(?(abc)def|)")]
        [InlineData("(?(\\w)\\d)", "(?(\\w)\\d|)")]
        // Loops inside alternation constructs
        [InlineData("(abc*|def)ghi", "(ab(?>c*)|def)ghi")]
        [InlineData("(abc|def*)ghi", "(abc|de(?>f*))ghi")]
        [InlineData("(abc*|def*)ghi", "(ab(?>c*)|de(?>f*))ghi")]
        [InlineData("(abc*|def*)", "(ab(?>c*)|de(?>f*))")]
        [InlineData("(?(\\w)abc*|def*)ghi", "(?(\\w)ab(?>c*)|de(?>f*))ghi")]
        [InlineData("(?(\\w)abc*|def*)", "(?(\\w)ab(?>c*)|de(?>f*))")]
        [InlineData("(?(xyz*)abc|def)", "(?(xy(?>z*))abc|def)")]
        [InlineData("(?(xyz*)abc|def)\\w", "(?(xy(?>z*))abc|def)\\w")]
        // Loops followed by alternation constructs
        [InlineData("a*(bcd|efg)", "(?>a*)(bcd|efg)")]
        [InlineData("a*(?(xyz)bcd|efg)", "(?>a*)(?(xyz)bcd|efg)")]
        // Auto-atomicity
        [InlineData("a*b", "(?>a*)b")]
        [InlineData("a*b+", "(?>a*)(?>b+)")]
        [InlineData("a*b*", "(?>a*)(?>b*)")]
        [InlineData("a*b+c*", "(?>a*)(?>b+)(?>c*)")]
        [InlineData("a*b*c*", "(?>a*)(?>b*)(?>c*)")]
        [InlineData("a*b*c*|d*[ef]*", "(?>a*)(?>b*)(?>c*)|(?>d*)(?>[ef]*)")]
        [InlineData("(a*)(b*)(c*)", "((?>a*))((?>b*))((?>c*))")]
        [InlineData("a*b{3,4}", "(?>a*)(?>b{3,4})")]
        [InlineData("[ab]*[^a]*", "[ab]*(?>[^a]*)")]
        [InlineData("[aa]*[^a]*", "(?>a*)(?>[^a]*)")]
        [InlineData("a??", "")]
        [InlineData("ab?c", "a(?>b?)c")]
        [InlineData("ab??c", "a(?>b?)c")]
        [InlineData("ab{2}?c", "abbc")]
        [InlineData("ab{2,3}?c", "a(?>b{2,3})c")]
        //[InlineData("(abc*?)", "(ab)")] // TODO https://github.com/dotnet/runtime/issues/66031: Need to reorganize optimizations to avoid an extra Empty being left at the end of the tree
        [InlineData("a{1,3}?", "a{1,4}?")]
        [InlineData("a{2,3}?", "a{2}")]
        [InlineData("bc(a){1,3}?", "bc(a){1,2}?")]
        [InlineData("c{3,}?|f{2,}?", "c{3}|f{2}")]
        [InlineData("[a-z]*[\x0000-\xFFFF]+", "[a-z]*(?>[\x0000-\xFFFF]+)")]
        [InlineData("a+b", "(?>a+)b")]
        [InlineData("a?b", "(?>a?)b")]
        [InlineData("[^\n]*\n", "(?>[^\n]*)\n")]
        [InlineData("[^\n]*\n+", "(?>[^\n]*)(?>\n+)")]
        [InlineData("(a+)b", "((?>a+))b")]
        [InlineData("a*(?:bcd|efg)", "(?>a*)(?:bcd|efg)")]
        [InlineData("\\w+\\b", "(?>\\w+)\\b")]
        [InlineData("\\d+\\b", "(?>\\d+)\\b")]
        [InlineData("\\W+\\B", "(?>\\W+)\\B")]
        [InlineData("\\D+\\B", "(?>\\D+)\\B")]
        [InlineData("(?:abc*|def*)g", "(?:ab(?>c*)|de(?>f*))g")]
        [InlineData("(?:a[ce]*|b*)g", "(?:a(?>[ce]*)|(?>b*))g")]
        [InlineData("(?:a[ce]*|b*)c", "(?:a[ce]*|(?>b*))c")]
        [InlineData("apple|(?:orange|pear)|grape", "apple|orange|pear|grape")]
        [InlineData("(?:abc)*", "(?>(?>(?>(?:abc)*)))")]
        [InlineData("(?:w*)+", "(?>(?>w*)+)")]
        [InlineData("(?:w*)+\\.", "(?>w*)+\\.")]
        [InlineData("(a[bcd]e*)*fg", "(a[bcd](?>e*))*fg")]
        [InlineData("(\\w[bcd]\\s*)*fg", "(\\w[bcd](?>\\s*))*fg")]
        [InlineData(@"\b(\w+)\b", @"\b((?>\w+))\b")]
        [InlineData(@"\b(?:\w+)\b ", @"\b(?>\w+)\b ")]
        // Nothing handling
        [InlineData(@"\wabc(?!)def", "(?!)")]
        [InlineData(@"\wabc(?!)def|ghi(?!)", "(?!)")]
        // IgnoreCase set creation
        [InlineData("(?i)abcd", "[Aa][Bb][Cc][Dd]")]
        [InlineData("(?i)abcd|efgh", "[Aa][Bb][Cc][Dd]|[Ee][Ff][Gg][Hh]")]
        [InlineData("(?i)a|b", "[AaBb]")]
        [InlineData("(?i)[abcd]", "[AaBbCcDd]")]
        [InlineData("(?i)[acexyz]", "[AaCcEeXxYyZz]")]
        [InlineData("(?i)\\w", "\\w")]
        [InlineData("(?i)\\d", "\\d")]
        [InlineData("(?i).", ".")]
        [InlineData("(?i)\\$", "\\$")]
        public void PatternsReduceIdentically(string actual, string expected)
        {
            // NOTE: RegexNode.ToString is only compiled into debug builds, so DEBUG is currently set on the unit tests project.

            string actualStr = RegexParser.Parse(actual, RegexOptions.None, CultureInfo.InvariantCulture).Root.ToString();
            string expectedStr = RegexParser.Parse(expected, RegexOptions.None, CultureInfo.InvariantCulture).Root.ToString();
            if (actualStr != expectedStr)
            {
                throw Xunit.Sdk.EqualException.ForMismatchedValues(expectedStr, actualStr);
            }
        }

        [Theory]
        // Not coalescing loops
        [InlineData("a[^a]", "a{2}")]
        [InlineData("[^a]a", "[^a]{2}")]
        [InlineData("a*b*", "a*")]
        [InlineData("a*b*", "b*")]
        [InlineData("[^a]*[^b]", "[^a]*")]
        [InlineData("[ace]*[acd]", "[ace]*")]
        [InlineData("a+b+", "a+")]
        [InlineData("a+b+", "b+")]
        [InlineData("a*(a*)", "a*")]
        [InlineData("(a*)a*", "a*")]
        [InlineData("a*(?>a*)", "a*")]
        [InlineData("a*a*?", "a*")]
        [InlineData("a*?a*", "a*")]
        [InlineData("a*[^a]*", "a*")]
        [InlineData("[^a]*a*", "a*")]
        [InlineData("(?>a*)(?>a+)", "(?>a+)")]
        [InlineData("(?>a*)(?>a{1,3})", "(?>a+)")]
        [InlineData("(?>a+)(?>a+)", "(?>a{2,})")]
        [InlineData("(?>a+)(?>a{1,3})", "(?>a{2,})")]
        [InlineData("(?>a?)(?>a+)", "(?>a+)")]
        [InlineData("(?>a?)(?>a{1,3})", "(?>a{1,4})")]
        [InlineData("(?>a{1,3})(?>a+)", "(?>a{2,})")]
        [InlineData("(?>a{1,3})(?>a{1,3})", "(?>a{2,6})")]
        [InlineData("(?>[^a]*)(?>[^a]+)", "(?>[^a]+)")]
        [InlineData("(?>[^a]*)(?>[^a]{1,3})", "(?>[^a]+)")]
        [InlineData("(?>[^a]+)(?>[^a]+)", "(?>[^a]{2,})")]
        [InlineData("(?>[^a]+)(?>[^a]{1,3})", "(?>[^a]{2,})")]
        [InlineData("(?>[^a]?)(?>[^a]+)", "(?>[^a]+)")]
        [InlineData("(?>[^a]?)(?>[^a]{1,3})", "(?>[^a]{1,4})")]
        [InlineData("(?>[^a]{1,3})(?>[^a]+)", "(?>[^a]{2,})")]
        [InlineData("(?>[^a]{1,3})(?>[^a]{1,3})", "(?>[^a]{2,6})")]
        [InlineData("(?>[0-9]*)(?>[0-9]+)", "(?>[0-9]+)")]
        [InlineData("(?>[0-9]*)(?>[0-9]{1,3})", "(?>[0-9]+)")]
        [InlineData("(?>[0-9]+)(?>[0-9]+)", "(?>[0-9]{2,})")]
        [InlineData("(?>[0-9]+)(?>[0-9]{1,3})", "(?>[0-9]{2,})")]
        [InlineData("(?>[0-9]?)(?>[0-9]+)", "(?>[0-9]+)")]
        [InlineData("(?>[0-9]?)(?>[0-9]{1,3})", "(?>[0-9]{1,4})")]
        [InlineData("(?>[0-9]{1,3})(?>[0-9]+)", "(?>[0-9]{2,})")]
        [InlineData("(?>[0-9]{1,3})(?>[0-9]{1,3})", "(?>[0-9]{2,6})")]
        [InlineData("(?>a*)a", "(?>a+)")]
        [InlineData("(?>a+)a", "(?>a{2,})")]
        [InlineData("(?>a?)a", "(?>a{1,2})")]
        [InlineData("(?>a{1,3})a", "(?>a{2,4})")]
        [InlineData("(?>[^a]*)[^a]", "(?>[^a]+)")]
        [InlineData("(?>[^a]+)[^a]", "(?>[^a]{2,})")]
        [InlineData("(?>[^a]?)[^a]", "(?>[^a]{1,2})")]
        [InlineData("(?>[^a]{1,3})[^a]", "(?>[^a]{2,4})")]
        [InlineData("(?>[0-9]*)[0-9]", "(?>[0-9]+)")]
        [InlineData("(?>[0-9]+)[0-9]", "(?>[0-9]{2,})")]
        [InlineData("(?>[0-9]?)[0-9]", "(?>[0-9]{1,2})")]
        [InlineData("(?>[0-9]{1,3})[0-9]", "(?>[0-9]{2,4})")]
        [InlineData("a{2147483646}a", "a{2147483647}")]
        [InlineData("a{2147483647}a", "a{2147483647}")]
        [InlineData("a{0,2147483646}a", "a{0,2147483647}")]
        [InlineData("aa{2147483646}", "a{2147483647}")]
        [InlineData("aa{0,2147483646}", "a{0,2147483647}")]
        [InlineData("a{2147482647}a{1000}", "a{2147483647}")]
        [InlineData("a{0,2147482647}a{0,1000}", "a{0,2147483647}")]
        [InlineData("[^a]{2147483646}[^a]", "[^a]{2147483647}")]
        [InlineData("[^a]{2147483647}[^a]", "[^a]{2147483647}")]
        [InlineData("[^a]{0,2147483646}[^a]", "[^a]{0,2147483647}")]
        [InlineData("[^a][^a]{2147483646}", "[^a]{2147483647}")]
        [InlineData("[^a][^a]{0,2147483646}", "[^a]{0,2147483647}")]
        [InlineData("[^a]{2147482647}[^a]{1000}", "[^a]{2147483647}")]
        [InlineData("[^a]{0,2147482647}[^a]{0,1000}", "[^a]{0,2147483647}")]
        [InlineData("[ace]{2147483646}[ace]", "[ace]{2147483647}")]
        [InlineData("[ace]{2147483647}[ace]", "[ace]{2147483647}")]
        [InlineData("[ace]{0,2147483646}[ace]", "[ace]{0,2147483647}")]
        [InlineData("[ace][ace]{2147483646}", "[ace]{2147483647}")]
        [InlineData("[ace][ace]{0,2147483646}", "[ace]{0,2147483647}")]
        [InlineData("[ace]{2147482647}[ace]{1000}", "[ace]{2147483647}")]
        [InlineData("[ace]{0,2147482647}[ace]{0,1000}", "[ace]{0,2147483647}")]
        // Not reducing branches of alternations with different casing
        [InlineData("(?i:abcd)|abcd", "abcd|abcd")]
        [InlineData("abcd|(?i:abcd)", "abcd|abcd")]
        // Not applying auto-atomicity
        [InlineData(@"(a*|b*)\w*", @"((?>a*)|(?>b*))\w*")]
        [InlineData("[ab]*[^a]", "(?>[ab]*)[^a]")]
        [InlineData("[ab]*[^a]*", "(?>[ab]*)[^a]*")]
        [InlineData("[ab]*[^a]*?", "(?>[ab]*)[^a]*?")]
        [InlineData("[ab]*(?>[^a]*)", "(?>[ab]*)(?>[^a]*)")]
        [InlineData("[^\n]*\n*", "(?>[^\n]*)\n")]
        [InlineData("(a[bcd]a*)*fg", "(a[bcd](?>a*))*fg")]
        [InlineData("(\\w[bcd]\\d*)*fg", "(\\w[bcd](?>\\d*))*fg")]
        [InlineData("a*(?<=[^a])b", "(?>a*)(?<=[^a])b")]
        [InlineData("[\x0000-\xFFFF]*[a-z]", "(?>[\x0000-\xFFFF]*)[a-z]")]
        [InlineData("[a-z]*[\x0000-\xFFFF]+", "(?>[a-z]*)[\x0000-\xFFFF]+")]
        [InlineData("[^a-c]*[e-g]", "(?>[^a-c]*)[e-g]")]
        [InlineData("[^a-c]*[^e-g]", "(?>[^a-c]*)[^e-g]")]
        [InlineData("(w+)+", "((?>w+))+")]
        [InlineData("(w{1,2})+", "((?>w{1,2}))+")]
        [InlineData("(?:ab|cd|ae)f", "(?>ab|cd|ae)f")]
        [InlineData("ab?(b)", "a(?>b?)(b)")]
        [InlineData("ab??c?", "a(?>b??)c?")]
        [InlineData("ab{2,3}?c?", "a(?>b{2,3}?)c?")]
        [InlineData("(?:ab??){2}", "(?:a(?>b??)){2}")]
        [InlineData("(?:ab??){2, 3}", "(?:a(?>b??)){2, 3}")]
        [InlineData("ab??(b)", "a(?>b??)(b)")]
        [InlineData(@"\w+\b\w+", @"(?>\w+)\b\w")]
        [InlineData(@"\w*\b\w+", @"(?>\w*)\b\w+")]
        [InlineData(@"\W+\B\W+", @"(?>\W+)\B\W")]
        [InlineData(@"\W*\B\W+", @"(?>\W*)\B\W")]
        [InlineData(@"a?\b", @"(?>a?)\b")]
        [InlineData(@"\w*\b", @"(?>\w*)\b")]
        [InlineData(@"\d*\b", @"(?>\d*)\b")]
        [InlineData(@"\W*\B", @"(?>\W*)\B")]
        [InlineData(@"\D*\B", @"(?>\D*)\B")]
        // Loops inside alternation constructs
        [InlineData("(abc*|def)chi", "(ab(?>c*)|def)chi")]
        [InlineData("(abc|def*)fhi", "(abc|de(?>f*))fhi")]
        [InlineData("(abc*|def*)\\whi", "(ab(?>c*)|de(?>f*))\\whi")]
        [InlineData("(?(\\w)abc*|def*)\\whi", "(?(\\w)ab(?>c*)|de(?>f*))\\whi")]
        // Loops followed by alternation constructs
        [InlineData("a*(bcd|afg)", "(?>a*)(bcd|afg)")]
        [InlineData("(a*)(?(1)bcd|efg)", "((?>a*))(?(1)bcd|efg)")]
        [InlineData("a*(?(abc)bcd|efg)", "(?>a*)(?(abc)bcd|efg)")]
        [InlineData("a*(?(xyz)acd|efg)", "(?>a*)(?(xyz)acd|efg)")]
        [InlineData("a*(?(xyz)bcd|afg)", "(?>a*)(?(xyz)bcd|afg)")]
        [InlineData("a*(?(xyz)bcd)", "(?>a*)(?(xyz)bcd)")]
        public void PatternsReduceDifferently(string actual, string expected)
        {
            // NOTE: RegexNode.ToString is only compiled into debug builds, so DEBUG is currently set on the unit tests project.

            string actualStr = RegexParser.Parse(actual, RegexOptions.None, CultureInfo.InvariantCulture).Root.ToString();
            string expectedStr = RegexParser.Parse(expected, RegexOptions.None, CultureInfo.InvariantCulture).Root.ToString();
            if (actualStr == expectedStr)
            {
                throw Xunit.Sdk.NotEqualException.ForEqualValues(expectedStr, actualStr);
            }
        }

        [Theory]
        [InlineData(@"a", 0, 1, 1)]
        [InlineData(@"[^a]", 0, 1, 1)]
        [InlineData(@"[abcdefg]", 0, 1, 1)]
        [InlineData(@"abcd", 0, 4, 4)]
        [InlineData(@"a*", 0, 0, null)]
        [InlineData(@"a*?", 0, 0, null)]
        [InlineData(@"a?", 0, 0, 1)]
        [InlineData(@"a??", 0, 0, 1)]
        [InlineData(@"a+", 0, 1, null)]
        [InlineData(@"a+?", 0, 1, null)]
        [InlineData(@"(?>a*)a", 0, 1, null)]
        [InlineData(@"(?>a*)a+", 0, 1, null)]
        [InlineData(@"(?>a*)a*", 0, 0, null)]
        [InlineData(@"a{2}", 0, 2, 2)]
        [InlineData(@"a{2}?", 0, 2, 2)]
        [InlineData(@"a{3,17}", 0, 3, 17)]
        [InlineData(@"a{3,17}?", 0, 3, 17)]
        [InlineData(@"[^a]{3,17}", 0, 3, 17)]
        [InlineData(@"[^a]{3,17}?", 0, 3, 17)]
        [InlineData(@"(abcd){5}", 0, 20, 20)]
        [InlineData(@"(abcd|ef){2,6}", 0, 4, 24)]
        [InlineData(@"abcef|de", 0, 2, 5)]
        [InlineData(@"abc(def|ghij)k", 0, 7, 8)]
        [InlineData(@"abc(def|ghij|k||lmnopqrs|t)u", 0, 4, 12)]
        [InlineData(@"(ab)c(def|ghij|k|l|\1|m)n", 0, 4, null)]
        [InlineData(@"abc|de*f|ghi", 0, 2, null)]
        [InlineData(@"abc|de+f|ghi", 0, 3, null)]
        [InlineData(@"abc|(def)+|ghi", 0, 3, null)]
        [InlineData(@"(abc)+|def", 0, 3, null)]
        [InlineData(@"\d{1,2}-\d{1,2}-\d{2,4}", 0, 6, 10)]
        [InlineData(@"\d{1,2}-(?>\d{1,2})-\d{2,4}", 0, 6, 10)]
        [InlineData(@"1(?=9)\d", 0, 2, 2)]
        [InlineData(@"1(?!\d)\w", 0, 2, 2)]
        [InlineData(@"a*a*a*a*a*a*a*b*", 0, 0, null)]
        [InlineData(@"((a{1,2}){4}){3,7}", 0, 12, 56)]
        [InlineData(@"((a{1,2}){4}?){3,7}", 0, 12, 56)]
        [InlineData(@"\b\w{4}\b", 0, 4, 4)]
        [InlineData(@"\b\w{4}\b", (int)RegexOptions.ECMAScript, 4, 4)]
        [InlineData(@"abcd(?=efgh)efgh", 0, 8, 8)]
        [InlineData(@"abcd(?<=cd)efgh", 0, 8, 8)]
        [InlineData(@"abcd(?!ab)efgh", 0, 8, 8)]
        [InlineData(@"abcd(?<!ef)efgh", 0, 8, 8)]
        [InlineData(@"(a{1073741824}){2}", 0, 2147483646, null)] // min length max is bound to int.MaxValue - 1 for convenience in other places where we need to be able to add 1 without risk of overflow
        [InlineData(@"a{1073741824}b{1073741824}", 0, 2147483646, null)]
        [InlineData(@"((((((((((((((((((((((((((((((ab|cd+)|ef+)|gh+)|ij+)|kl+)|mn+)|op+)|qr+)|st+)|uv+)|wx+)|yz+)|01+)|23+)|45+)|67+)|89+)|AB+)|CD+)|EF+)|GH+)|IJ+)|KL+)|MN+)|OP+)|QR+)|ST+)|UV+)|WX+)|YZ)", 0, 2, null)]
        [InlineData(@"(YZ+|(WX+|(UV+|(ST+|(QR+|(OP+|(MN+|(KL+|(IJ+|(GH+|(EF+|(CD+|(AB+|(89+|(67+|(45+|(23+|(01+|(yz+|(wx+|(uv+|(st+|(qr+|(op+|(mn+|(kl+|(ij+|(gh+|(ef+|(de+|(a|bc+)))))))))))))))))))))))))))))))", 0, 1, null)]
        [InlineData(@"a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(a(ab|cd+)|ef+)|gh+)|ij+)|kl+)|mn+)|op+)|qr+)|st+)|uv+)|wx+)|yz+)|01+)|23+)|45+)|67+)|89+)|AB+)|CD+)|EF+)|GH+)|IJ+)|KL+)|MN+)|OP+)|QR+)|ST+)|UV+)|WX+)|YZ+)", 0, 3, null)]
        [InlineData(@"(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((a)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))", 0, 1, 1)]
        [InlineData(@"(?(\d)\d{3}|\d)", 0, 1, 3)]
        [InlineData(@"(?(\d{7})\d{3}|\d{2})", 0, 2, 3)]
        [InlineData(@"(?(\d{7})\d{2}|\d{3})", 0, 2, 3)]
        [InlineData(@"(?(\d)\d{3}|\d{2})", 0, 2, 3)]
        [InlineData(@"(?(\d)|\d{2})", 0, 0, 2)]
        [InlineData(@"(?(\d)\d{3})", 0, 0, 3)]
        [InlineData(@"(abc)(?(1)\d{3}|\d{2})", 0, 5, 6)]
        [InlineData(@"(abc)(?(1)\d{2}|\d{3})", 0, 5, 6)]
        [InlineData(@"(abc)(?(1)|\d{2})", 0, 3, 5)]
        [InlineData(@"(abc)(?(1)\d{3})", 0, 3, 6)]
        [InlineData(@"(abc|)", 0, 0, 3)]
        [InlineData(@"(|abc)", 0, 0, 3)]
        [InlineData(@"(?(x)abc|)", 0, 0, 3)]
        [InlineData(@"(?(x)|abc)", 0, 0, 3)]
        [InlineData(@"(?(x)|abc)^\A\G\z\Z$", 0, 0, 3)]
        [InlineData(@"(?(x)|abc)^\A\G\z$\Z", (int)RegexOptions.Multiline, 0, 3)]
        [InlineData(@"^\A\Gabc", 0, 3, null)] // leading anchor currently prevents ComputeMaxLength from being invoked, as it's not needed
        [InlineData(@"^\A\Gabc", (int)RegexOptions.Multiline, 3, null)]
        [InlineData(@"abc            def", (int)RegexOptions.IgnorePatternWhitespace, 6, 6)]
        [InlineData(@"abcdef", (int)RegexOptions.RightToLeft, 6, null)]
        public void MinMaxLengthIsCorrect(string pattern, int options, int expectedMin, int? expectedMax)
        {
            RegexTree tree = RegexParser.Parse(pattern, (RegexOptions)options, CultureInfo.InvariantCulture);

            Assert.Equal(expectedMin, tree.FindOptimizations.MinRequiredLength);

            if (!pattern.EndsWith('$') &&
                !pattern.EndsWith(@"\Z", StringComparison.OrdinalIgnoreCase))
            {
                // MaxPossibleLength is currently only computed/stored if there's a trailing End{Z} anchor as the max length is otherwise unused
                tree = RegexParser.Parse($"(?:{pattern})$", (RegexOptions)options, CultureInfo.InvariantCulture);
            }

            Assert.Equal(expectedMax, tree.FindOptimizations.MaxPossibleLength);
        }

        [Fact]
        public void MinMaxLengthIsCorrect_HugeDepth()
        {
            const int Depth = 10_000;
            RegexTree tree = RegexParser.Parse($"{new string('(', Depth)}a{new string(')', Depth)}$", 0, CultureInfo.InvariantCulture); // too deep for analysis on some platform default stack sizes

            int minRequiredLength = tree.FindOptimizations.MinRequiredLength;

            Assert.True(
                minRequiredLength is 1 /* successfully analyzed */ or 0 /* ran out of stack space to complete analysis */,
                $"Expected 1 or 0, got {minRequiredLength}");

            int? maxPossibleLength = tree.FindOptimizations.MaxPossibleLength;
            Assert.True(
                maxPossibleLength is 1 /* successfully analyzed */ or null /* ran out of stack space to complete analysis */,
                $"Expected 1 or null, got {maxPossibleLength}");
        }

        [Theory]
        [InlineData("(?i)abc", (int)RegexOptions.IgnoreCase)]
        [InlineData("(?i)abc(?-i)", (int)RegexOptions.IgnoreCase)]
        [InlineData("(?:hello(nested(?:abc|(?:(?i:b)))))", (int)RegexOptions.IgnoreCase)]
        [InlineData("(?-i)abc", (int)RegexOptions.None)]
        [InlineData("(?mi)abc", (int)RegexOptions.IgnoreCase | (int)RegexOptions.Multiline)]
        [InlineData("(?im)abc", (int)RegexOptions.IgnoreCase | (int)RegexOptions.Multiline)]
        [InlineData("(?i)ab(?m)c", (int)RegexOptions.IgnoreCase | (int)RegexOptions.Multiline)]
        [InlineData("(?xmi)abc", (int)RegexOptions.IgnoreCase | (int)RegexOptions.IgnorePatternWhitespace | (int)RegexOptions.Multiline)]
        [InlineData("(?s)abc", (int)RegexOptions.Singleline)]
        [InlineData("(?-simx)abc", (int)RegexOptions.None)]
        public void FoundOptionsInPatternIsCorrect(string pattern, int expectedOptions)
        {
            RegexOptions foundOptions = RegexParser.ParseOptionsInPattern(pattern, RegexOptions.None);
            Assert.Equal((RegexOptions)expectedOptions, foundOptions);
        }
    }
}
