From 439d43932133d32dcabd482f11842072d52b41e1 Mon Sep 17 00:00:00 2001 From: Zher Huei Lee Date: Sun, 23 Oct 2016 01:22:48 +0100 Subject: RegEx re-implemented as a module Re-wrote nrex as a module using godot-specific parts and new features: * Added string substitutions. * Named groups are now supported. * Removed use of mutable variables in RegEx. RegExMatch is returned instead. --- modules/regex/regex.h | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 modules/regex/regex.h (limited to 'modules/regex/regex.h') diff --git a/modules/regex/regex.h b/modules/regex/regex.h new file mode 100644 index 000000000..283368c34 --- /dev/null +++ b/modules/regex/regex.h @@ -0,0 +1,114 @@ +/*************************************************************************/ +/* regex.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* http://www.godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef REGEX_H +#define REGEX_H + +#include "core/vector.h" +#include "core/ustring.h" +#include "core/dictionary.h" +#include "core/reference.h" +#include "core/resource.h" + +class RegExNode; + +class RegExMatch : public Reference { + + OBJ_TYPE(RegExMatch, Reference); + + struct Group { + Variant name; + int start; + int length; + }; + + Vector captures; + String string; + + friend class RegEx; + friend class RegExSearch; + friend class RegExNodeCapturing; + friend class RegExNodeBackReference; + +protected: + + static void _bind_methods(); + +public: + + String expand(const String& p_template) const; + + int get_group_count() const; + Array get_group_array() const; + + Array get_names() const; + Dictionary get_name_dict() const; + + String get_string(const Variant& p_name) const; + int get_start(const Variant& p_name) const; + int get_end(const Variant& p_name) const; + + RegExMatch(); + +}; + +class RegEx : public Reference { + + OBJ_TYPE(RegEx, Reference); + + RegExNode* root; + Vector group_names; + String pattern; + int lookahead_depth; + +protected: + + static void _bind_methods(); + +public: + + void clear(); + Error compile(const String& p_pattern); + + Ref search(const String& p_text, int p_start = 0, int p_end = -1) const; + String sub(const String& p_text, const String& p_template, int p_start = 0, int p_end = -1) const; + + bool is_valid() const; + String get_pattern() const; + int get_group_count() const; + Array get_names() const; + + RegEx(); + RegEx(const String& p_pattern); + ~RegEx(); + +}; + +#endif // REGEX_H + -- cgit v1.2.3-70-g09d2 From c3b4686082bc92c70886ee848064009c8f628193 Mon Sep 17 00:00:00 2001 From: Zher Huei Lee Date: Mon, 24 Oct 2016 22:13:26 +0100 Subject: Added global sub and bounds checking to RegEx --- doc/base/classes.xml | 14 ++++++------ modules/regex/regex.cpp | 57 ++++++++++++++++++++++++++++++++++++++----------- modules/regex/regex.h | 2 +- 3 files changed, 54 insertions(+), 19 deletions(-) (limited to 'modules/regex/regex.h') diff --git a/doc/base/classes.xml b/doc/base/classes.xml index cafb14491..47fa1deb5 100644 --- a/doc/base/classes.xml +++ b/doc/base/classes.xml @@ -32582,7 +32582,7 @@ - Searches the text for the compiled pattern. Returns a [RegExMatch] container of the first matching reult if found, otherwise null. The starting point of the serch could be specified without moving the string start anchor. + Searches the text for the compiled pattern. Returns a [RegExMatch] container of the first matching reult if found, otherwise null. The region to search within can be specified without modifying where the start and end anchor would be. @@ -32590,14 +32590,16 @@ - + - + - + + + - Searches the specified text for the compiled pattern and returns the text with the result replaced. Escapes and backreferences such as [code]\1[/code] and [code]\g<name>[/code] are automatically expanded and resolved. If no change was found the unmodified text is returned instead. + Searches the text for the compiled pattern and replaces it with the specified string. Escapes and backreferences such as [code]\1[/code] and [code]\g<name>[/code] expanded and resolved. By default only the first instance is replaced but it can be changed for all instances (global replacement). The region to search within can be specified without modifying where the start and end anchor would be. @@ -32616,7 +32618,7 @@ - Using results from the search, returns the specified string with escapes and backreferences such as [code]\1[/code] and [code]\g<name>[/code] expanded and resolved + Using results from the search, returns the specified string with escapes and backreferences such as [code]\1[/code] and [code]\g<name>[/code] expanded and resolved. diff --git a/modules/regex/regex.cpp b/modules/regex/regex.cpp index 8f26d764c..388e6dfde 100644 --- a/modules/regex/regex.cpp +++ b/modules/regex/regex.cpp @@ -1340,6 +1340,12 @@ Error RegEx::compile(const String& p_pattern) { Ref RegEx::search(const String& p_text, int p_start, int p_end) const { + ERR_FAIL_COND_V(!is_valid(), NULL); + ERR_FAIL_COND_V(p_start < 0, NULL); + ERR_FAIL_COND_V(p_start >= p_text.length(), NULL); + ERR_FAIL_COND_V(p_end > p_text.length(), NULL); + ERR_FAIL_COND_V(p_end != -1 && p_end < p_start, NULL); + Ref res = memnew(RegExMatch()); for (int i = 0; i < group_names.size(); ++i) { @@ -1350,7 +1356,7 @@ Ref RegEx::search(const String& p_text, int p_start, int p_end) cons res->string = p_text; - if (p_end < p_start || p_end > p_text.length()) + if (p_end == -1) p_end = p_text.length(); RegExSearch s(res, p_end, lookahead_depth); @@ -1369,18 +1375,45 @@ Ref RegEx::search(const String& p_text, int p_start, int p_end) cons return NULL; } -String RegEx::sub(const String& p_text, const String& p_template, int p_start, int p_end) const { +String RegEx::sub(const String& p_text, const String& p_replacement, bool p_all, int p_start, int p_end) const { - Ref m = search(p_text, p_start, p_end); - RegExMatch::Group& s = m->captures[0]; - if (s.start >= 0) { - String res = p_text.substr(0, s.start) + m->expand(p_template); - int end = s.start + s.length; - if (end < p_text.length()) - res += p_text.substr(end, p_text.length() - end); - return res; + ERR_FAIL_COND_V(!is_valid(), p_text); + ERR_FAIL_COND_V(p_start < 0, p_text); + ERR_FAIL_COND_V(p_start >= p_text.length(), p_text); + ERR_FAIL_COND_V(p_end > p_text.length(), p_text); + ERR_FAIL_COND_V(p_end != -1 && p_end < p_start, p_text); + + String text = p_text; + int start = p_start; + + if (p_end == -1) + p_end = p_text.length(); + + while (start < text.length() && (p_all || start == p_start)) { + + Ref m = search(text, start, p_end); + + RegExMatch::Group& s = m->captures[0]; + + if (s.start < 0) + break; + + String res = text.substr(0, s.start) + m->expand(p_replacement); + + start = res.length(); + + if (s.length == 0) + ++start; + + int sub_end = s.start + s.length; + if (sub_end < text.length()) + res += text.substr(sub_end, text.length() - sub_end); + + p_end += res.length() - text.length(); + + text = res; } - return p_text; + return text; } void RegEx::clear() { @@ -1456,7 +1489,7 @@ void RegEx::_bind_methods() { ObjectTypeDB::bind_method(_MD("clear"),&RegEx::clear); ObjectTypeDB::bind_method(_MD("compile","pattern"),&RegEx::compile); ObjectTypeDB::bind_method(_MD("search","text","start","end"),&RegEx::search, DEFVAL(0), DEFVAL(-1)); - ObjectTypeDB::bind_method(_MD("sub","text","template","start","end"),&RegEx::sub, DEFVAL(0), DEFVAL(-1)); + ObjectTypeDB::bind_method(_MD("sub","text","replacement","all","start","end"),&RegEx::sub, DEFVAL(false), DEFVAL(0), DEFVAL(-1)); ObjectTypeDB::bind_method(_MD("is_valid"),&RegEx::is_valid); ObjectTypeDB::bind_method(_MD("get_pattern"),&RegEx::get_pattern); ObjectTypeDB::bind_method(_MD("get_group_count"),&RegEx::get_group_count); diff --git a/modules/regex/regex.h b/modules/regex/regex.h index 283368c34..8d31b8477 100644 --- a/modules/regex/regex.h +++ b/modules/regex/regex.h @@ -97,7 +97,7 @@ public: Error compile(const String& p_pattern); Ref search(const String& p_text, int p_start = 0, int p_end = -1) const; - String sub(const String& p_text, const String& p_template, int p_start = 0, int p_end = -1) const; + String sub(const String& p_text, const String& p_replacement, bool p_all = false, int p_start = 0, int p_end = -1) const; bool is_valid() const; String get_pattern() const; -- cgit v1.2.3-70-g09d2 From 9a5ce099f1c3559cc46b923d4e192a7be781163c Mon Sep 17 00:00:00 2001 From: Zher Huei Lee Date: Wed, 26 Oct 2016 13:05:00 +0100 Subject: Changed RegEx to inherit Resource --- modules/regex/regex.cpp | 8 ++++++-- modules/regex/regex.h | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'modules/regex/regex.h') diff --git a/modules/regex/regex.cpp b/modules/regex/regex.cpp index 388e6dfde..a0f4b4934 100644 --- a/modules/regex/regex.cpp +++ b/modules/regex/regex.cpp @@ -985,7 +985,9 @@ static bool RegEx_is_shorthand(CharType ch) { Error RegEx::compile(const String& p_pattern) { - if (pattern == p_pattern) + ERR_FAIL_COND_V(p_pattern.length() == 0, FAILED); + + if (pattern == p_pattern && root) return OK; clear(); @@ -1421,7 +1423,7 @@ void RegEx::clear() { if (root) memdelete(root); - pattern.clear(); + root = NULL; group_names.clear(); lookahead_depth = 0; } @@ -1494,5 +1496,7 @@ void RegEx::_bind_methods() { ObjectTypeDB::bind_method(_MD("get_pattern"),&RegEx::get_pattern); ObjectTypeDB::bind_method(_MD("get_group_count"),&RegEx::get_group_count); ObjectTypeDB::bind_method(_MD("get_names"),&RegEx::get_names); + + ADD_PROPERTY(PropertyInfo(Variant::STRING, "pattern"), _SCS("compile"), _SCS("get_pattern")); } diff --git a/modules/regex/regex.h b/modules/regex/regex.h index 8d31b8477..803aa72b3 100644 --- a/modules/regex/regex.h +++ b/modules/regex/regex.h @@ -78,9 +78,9 @@ public: }; -class RegEx : public Reference { +class RegEx : public Resource { - OBJ_TYPE(RegEx, Reference); + OBJ_TYPE(RegEx, Resource); RegExNode* root; Vector group_names; -- cgit v1.2.3-70-g09d2