// Copyright 2017 Elias Kosunen
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file is a part of scnlib:
//     https://github.com/eliaskosunen/scnlib

#pragma once

#include <scn/scan.h>

#if !SCN_DISABLE_REGEX

#if defined(SCN_MODULE) && defined(SCN_IMPORT_STD)
import std;
#else
#include <vector>
#endif

namespace scn {
SCN_BEGIN_NAMESPACE

/**
 * \defgroup regex Regular expressions
 *
 * In header `<scn/regex.h>`
 *
 * scnlib doesn't do the regex processing itself, but delegates that task
 * to an external regex engine. This behavior is controlled by the
 * CMake option `SCN_REGEX_BACKEND`, which defaults to `std` (use
 * `std::regex`). Other possible options are `Boost` and `re2`.
 *
 * The exact feature set, syntax, and semantics of the regular expressions
 * may differ between the different backends.
 * See the documentation for each to learn more about the supported syntax.
 * In general:
 *  - `std` is available without external dependencies, but doesn't support
 *     named captures, has limited support for flags, and is slow.
 *  - `Boost` has the largest feature set, but is slow.
 *  - `re2` is fast, but doesn't support all regex features.
 *
 * <table>
 * <caption id="regex-cmp-table">
 * Regex backend feature comparison
 * </caption>
 *
 * <tr>
 * <th>Feature</th>
 * <th>`std`</th>
 * <th>`Boost`</th>
 * <th>`re2`</th>
 * </tr>
 *
 * <tr>
 * <td>Named captures</td>
 * <td>No</td>
 * <td>Yes</td>
 * <td>Yes</td>
 * </tr>
 *
 * <tr>
 * <td>Wide strings (`wchar_t`) as input</td>
 * <td>Yes</td>
 * <td>Yes</td>
 * <td>No</td>
 * </tr>
 *
 * <tr>
 * <td>Non-contiguous sources as input</td>
 * <td>Yes</td>
 * <td>Yes</td>
 * <td>No</td>
 * </tr>
 *
 * <tr>
 * <td>Unicode character classes (i.e. `\pL`)</td>
 * <td>No</td>
 * <td>Yes-ish <sup>[1]</sup></td>
 * <td>Yes</td>
 * </tr>
 *
 * <tr>
 * <td>Character classes (like this: `[[:alpha:]]`) match non-ASCII</td>
 * <td>No</td>
 * <td>Depends <sup>[2]</sup></td>
 * <td>No</td>
 * </tr>
 * </table>
 *
 * <sup>[1][2]</sup>: The behavior of Boost.Regex varies, whether it's
 * using the ICU or not. If it is, character classes like `\pL` and
 * `[[:alpha:]]` can match any non-ASCII characters. Otherwise, only ASCII
 * characters are matched.
 *
 * To do regex matching, the scanned type must either be a string
 * (`std::basic_string` or `std::basic_string_view`), or
 * `scn::basic_regex_matches`.
 *
 * <table>
 * <caption id="regex-flags-table">
 * Possible flags for regex scanning
 * </caption>
 *
 * <tr>
 * <th>Flag</th>
 * <th>Description</th>
 * <th>Support</th>
 * </tr>
 *
 * <tr>
 * <td>`/m`</td>
 * <td>
 * `multiline`:
 * `^` matches the beginning of a line, and `$` the end of a line.
 * </td>
 * <td>
 * Supported by `Boost` and `re2`.
 * For `std`, uses `std::regex_constants::multiline`,
 * which was introduced in C++17, but isn't implemented by MSVC.
 * </td>
 * </tr>
 *
 * <tr>
 * <td>`/s`</td>
 * <td>
 * `singleline`:
 * `.` matches a newline.
 * </td>
 * <td>
 * Supported by `Boost` and `re2`, not by `std`.
 * </td>
 * </tr>
 *
 * <tr>
 * <td>`/i`</td>
 * <td>
 * `icase`:
 * Matches are case-insensitive.
 * </td>
 * <td>
 * Supported by everyone: `std`, `Boost`, and `re2`.
 * </td>
 * </tr>
 *
 * <tr>
 * <td>`/n`</td>
 * <td>
 * `nosubs`:
 * Subexpressions aren't matched and stored separately.
 * </td>
 * <td>
 * Supported by everyone: `std`, `Boost`, and `re2`.
 * </td>
 * </tr>
 * </table>
 */

/**
 * A single (sub)expression match.
 *
 * \ingroup regex
 */
template <typename CharT>
class basic_regex_match {
public:
    using char_type = CharT;

    basic_regex_match(std::basic_string_view<CharT> str) : m_str(str) {}

#if SCN_REGEX_SUPPORTS_NAMED_CAPTURES
    basic_regex_match(std::basic_string_view<CharT> str,
                      std::basic_string_view<CharT> name)
        : m_str(str), m_name(name)
    {
    }
#endif

    /// Matched string
    std::basic_string_view<CharT> get() const
    {
        return m_str;
    }

    auto operator*() const
    {
        return m_str;
    }
    auto operator->() const
    {
        return &m_str;
    }

#if SCN_REGEX_SUPPORTS_NAMED_CAPTURES
    /// The name of this capture, if any.
    std::optional<std::basic_string_view<CharT>> name() const
    {
        return m_name;
    }
#endif

private:
    std::basic_string_view<CharT> m_str;

#if SCN_REGEX_SUPPORTS_NAMED_CAPTURES
    std::optional<std::basic_string_view<CharT>> m_name;
#endif
};

/**
 * Can be used to get all subexpression captures of a regex match.
 * Interface similar to a
 * `const std::vector<std::optional<basic_regex_match<CharT>>>`.
 * Stores `[0]` as a string, and the other matches as views into that string.
 *
 * \code{.cpp}
 * auto result =
 *     scn::scan<scn::regex_matches>("abc123", "{:/[(a-z]+)([0-9]+)/}");
 * // result->value() has three elements:
 * //  [0]: "abc123" (entire match)
 * //  [1]: "abc" (first subexpression match)
 * //  [2]: "123" (second subexpression match)
 * \endcode
 *
 * \ingroup regex
 */
template <typename CharT>
class basic_regex_matches {
public:
    struct submatch {
        std::size_t pos{};
        std::size_t len{};
#if SCN_REGEX_SUPPORTS_NAMED_CAPTURES
        std::basic_string<CharT> name{};
#endif

        explicit operator bool() const
        {
            return len != 0;
        }
    };

    using match_type = basic_regex_match<CharT>;

    using value_type = std::optional<match_type>;
    using size_type = std::size_t;

    basic_regex_matches() = default;

    basic_regex_matches(std::basic_string<CharT> match,
                        std::vector<submatch> submatches)
        : m_match(SCN_MOVE(match)), m_submatches(SCN_MOVE(submatches))
    {
    }

    SCN_NODISCARD std::optional<match_type> operator[](std::size_t i) const
    {
        if (i == 0) {
            return match_type{m_match};
        }
        SCN_EXPECT(i <= m_submatches.size());
        const auto& sm = m_submatches[i - 1];
        if (!sm) {
            return std::nullopt;
        }
#if SCN_REGEX_SUPPORTS_NAMED_CAPTURES
        if (sm.name.empty()) {
            return match_type{detail::make_string_view_from_pointers(
                m_match.data() + sm.pos, m_match.data() + sm.pos + sm.len)};
        }
        return match_type{
            detail::make_string_view_from_pointers(
                m_match.data() + sm.pos, m_match.data() + sm.pos + sm.len),
            sm.name};
#else
        return match_type{detail::make_string_view_from_pointers(
            m_match.data() + sm.pos, m_match.data() + sm.pos + sm.len)};
#endif
    }

    SCN_NODISCARD std::optional<match_type> at(std::size_t i) const
    {
        if (i == 0) {
            return match_type{m_match};
        }
        const auto& sm = m_submatches.at(i - 1);
        if (!sm) {
            return std::nullopt;
        }
#if SCN_REGEX_SUPPORTS_NAMED_CAPTURES
        if (sm.name.empty()) {
            return match_type{
                std::basic_string_view<CharT>{m_match}.substr(sm.pos, sm.len)};
        }
        return match_type{
            std::basic_string_view<CharT>{m_match}.substr(sm.pos, sm.len),
            sm.name};
#else
        return match_type{
            std::basic_string_view<CharT>{m_match}.substr(sm.pos, sm.len)};
#endif
    }

    SCN_NODISCARD std::size_t size() const
    {
        return m_submatches.size() + 1u;
    }

    struct iterator {
        using value_type = std::optional<match_type>;
        using difference_type = std::ptrdiff_t;
        using pointer = value_type*;
        using reference = value_type;
        using iterator_category = std::random_access_iterator_tag;

        iterator() = default;

        iterator(const basic_regex_matches& parent, std::size_t index)
            : m_parent(&parent), m_index(index)
        {
        }

        iterator& operator++()
        {
            SCN_EXPECT(m_parent);
            SCN_EXPECT(m_index < m_parent->size());
            ++m_index;
            return *this;
        }

        iterator operator++(int)
        {
            auto tmp = *this;
            ++*this;
            return tmp;
        }

        iterator& operator--()
        {
            SCN_EXPECT(m_parent);
            SCN_EXPECT(m_index > 0);
            --m_index;
            return *this;
        }

        iterator operator--(int)
        {
            auto tmp = *this;
            --*this;
            return tmp;
        }

        value_type operator*() const
        {
            SCN_EXPECT(m_parent);
            return (*m_parent)[m_index];
        }

        bool operator==(const iterator& other) const
        {
            return m_parent == other.m_parent && m_index == other.m_index;
        }

        bool operator!=(const iterator& other) const
        {
            return !(*this == other);
        }

        iterator& operator+=(difference_type n)
        {
            SCN_EXPECT(m_parent);
            SCN_EXPECT(static_cast<difference_type>(m_index) + n <=
                       static_cast<difference_type>(m_parent->size()));
            SCN_EXPECT(static_cast<difference_type>(m_index) + n >= 0);
            m_index += n;
            return *this;
        }

        friend iterator operator+(const iterator& it, difference_type n)
        {
            auto copy = it;
            copy += n;
            return copy;
        }

        friend iterator operator+(difference_type n, const iterator& it)
        {
            return it + n;
        }

        iterator& operator-=(difference_type n)
        {
            SCN_EXPECT(m_parent);
            SCN_EXPECT(static_cast<difference_type>(m_index) - n <=
                       static_cast<difference_type>(m_parent->size()));
            SCN_EXPECT(static_cast<difference_type>(m_index) - n >= 0);
            m_index -= n;
            return *this;
        }

        iterator operator-(difference_type n) const
        {
            auto copy = *this;
            copy -= n;
            return copy;
        }

        value_type operator[](difference_type n)
        {
            return *(*this + n);
        }

    private:
        const basic_regex_matches* m_parent{nullptr};
        std::size_t m_index{0};
    };

    SCN_NODISCARD iterator begin() const
    {
        return iterator{*this, 0};
    }

    SCN_NODISCARD iterator end() const
    {
        return iterator{*this, size()};
    }

private:
    std::basic_string<CharT> m_match{};
    std::vector<submatch> m_submatches{};
};

static_assert(ranges::bidirectional_range<basic_regex_matches<char>>);

template <typename CharT>
struct scanner<basic_regex_matches<CharT>, CharT>
    : detail::builtin_scanner<basic_regex_matches<CharT>, CharT> {
    template <typename ParseCtx>
    constexpr auto parse(ParseCtx& pctx) -> typename ParseCtx::iterator
    {
        auto begin = pctx.begin();
        const auto end = pctx.end();

        using handler_type = detail::specs_setter;
        auto checker = detail::specs_checker<handler_type>(
            handler_type(this->m_specs), detail::arg_type::none_type);
        const auto it = detail::parse_format_specs(
            detail::to_address(begin), detail::to_address(end), checker);

        detail::check_regex_type_specs(this->m_specs, checker);

        if (this->m_specs.type == detail::presentation_type::regex ||
            this->m_specs.type == detail::presentation_type::regex_escaped) {
#if !SCN_REGEX_SUPPORTS_NON_CONTIGUOUS_SOURCES
            if (!pctx.is_source_contiguous()) {
                SCN_UNLIKELY_ATTR
                // clang-format off
                checker.on_error("Cannot read a regex from a non-contiguous source");
                // clang-format on
            }
#endif
        }

        return it;
    }
};

SCN_END_NAMESPACE
}  // namespace scn

#endif
