From 365bf966cab22d669e3f978d2e7bff01a2adebe6 Mon Sep 17 00:00:00 2001 From: Unix1 Date: Fri, 28 Mar 2014 15:06:13 -0700 Subject: [PATCH 1/2] first run at rewriting parser, passes all current tests --- src/econfig_server.erl | 91 ++++++++++++------------------------ test/test_econfig_server.erl | 1 + 2 files changed, 31 insertions(+), 61 deletions(-) diff --git a/src/econfig_server.erl b/src/econfig_server.erl index 183fe42..04382ad 100644 --- a/src/econfig_server.erl +++ b/src/econfig_server.erl @@ -521,7 +521,6 @@ parse_inis(ConfName, IniFiles0) -> WriteFile = lists:last(IniFiles), WriteFile. - parse_ini_file(ConfName, IniFile) -> IniFilename = econfig_util:abs_pathname(IniFile), IniBin = @@ -535,65 +534,35 @@ parse_ini_file(ConfName, IniFile) -> Msg = list_to_binary(io_lib:format(Fmt, [IniFilename])), throw({startup_error, Msg}) end, + {match, Sections} = parse_ini_bin_to_sections(IniBin), + {ParsedIniValues, DeleteIniKeys} = parse_ini_sections_to_conf(ConfName, Sections), + {ok, ParsedIniValues, DeleteIniKeys}. - Lines = re:split(IniBin, "\r\n|\n|\r|\032", [{return, list}]), - {_, ParsedIniValues, DeleteIniKeys} = - lists:foldl(fun(Line, {AccSectionName, AccValues, AccDeletes}) -> - case string:strip(Line) of - "[" ++ Rest -> - case re:split(Rest, "\\]", [{return, list}]) of - [NewSectionName, ""] -> - {NewSectionName, AccValues, AccDeletes}; - _Else -> % end bracket not at end, ignore this line - {AccSectionName, AccValues, AccDeletes} - end; - ";" ++ _Comment -> - {AccSectionName, AccValues, AccDeletes}; - Line2 -> - case re:split(Line2, "\s*=\s*", [{return, list}]) of - [Value] -> - MultiLineValuePart = case re:run(Line, "^ \\S", []) of - {match, _} -> - true; - _ -> - false - end, - case {MultiLineValuePart, AccValues} of - {true, [{{_, ValueName}, PrevValue} | AccValuesRest]} -> - % remove comment - case re:split(Value, "\s*;|\t;", [{return, list}]) of - [[]] -> - % empty line - {AccSectionName, AccValues, AccDeletes}; - [LineValue | _Rest] -> - E = {{AccSectionName, ValueName}, - PrevValue ++ " " ++ - econfig_util:trim_whitespace(LineValue)}, - {AccSectionName, [E | AccValuesRest], AccDeletes} - end; - _ -> - {AccSectionName, AccValues, AccDeletes} - end; - [""|_LineValues] -> % line begins with "=", ignore - {AccSectionName, AccValues, AccDeletes}; - [ValueName|LineValues] -> % yeehaw, got a line! - %% replace all tabs by an empty value. - ValueName1 = econfig_util:trim_whitespace(ValueName), - RemainingLine = econfig_util:implode(LineValues, "="), - % removes comments - case re:split(RemainingLine, "\s*;|\t;", [{return, list}]) of - [[]] -> - % empty line means delete this key - AccDeletes1 = [{ConfName, AccSectionName, ValueName1} - | AccDeletes], - {AccSectionName, AccValues, AccDeletes1}; - [LineValue | _Rest] -> - {AccSectionName, - [{{ConfName, AccSectionName, ValueName1}, - econfig_util:trim_whitespace(LineValue)} - | AccValues], AccDeletes} - end - end +parse_ini_bin_to_sections(IniBin) -> + re:run(IniBin, <<"(?:\\[([^\\]]+)\\](.*?(?=(?:\\n\\[|$))))+">>, + [global, dotall, {capture, all_but_first, binary}]). + +parse_ini_sections_to_conf(ConfName, Sections) -> + lists:foldl( + fun([Name|Content], Acc) -> + case re:run(Content, <<"(?:^|\\n)[\\n]*((?:(?!;).)*?)=(.*?)(?:;.*?)?(?=\\n|$)">>, + [global, dotall, {capture, all_but_first, list}]) of + {match, KVList} -> + lists:foldl( + fun([K, V], {I, D}) -> + K1 = econfig_util:trim_whitespace(K), + V1 = econfig_util:trim_whitespace(V), + case V1 of + [] -> + {I, [{ConfName, binary_to_list(Name), K1} | D]}; + _ -> + {[{{ConfName, binary_to_list(Name), K1}, V1} | I], D} + end + end, + Acc, KVList); + _ -> + Acc end - end, {"", [], []}, Lines), - {ok, ParsedIniValues, DeleteIniKeys}. + end, + {[], []}, + Sections). diff --git a/test/test_econfig_server.erl b/test/test_econfig_server.erl index 1a3d6fd..9be5d0b 100644 --- a/test/test_econfig_server.erl +++ b/test/test_econfig_server.erl @@ -36,6 +36,7 @@ parse_test_() -> ?_assertEqual("value 9", econfig:get_value(t, "section 2", "key9")), ?_assertEqual("value10", econfig:get_value(t, "section 2", "key10")), ?_assertEqual("new-val-11", econfig:get_value(t, "section3", "key11")), + ?_assertEqual(undefined, econfig:get_value(t, "section3", "key12")), ?_assertEqual("this is a value for key 13", econfig:get_value(t, "section3", "key13")), ?_assertEqual("some-collection.of+random@characters", econfig:get_value(t, "section3", "key14")), ?_assertEqual(undefined, econfig:get_value(t, "section3", "key15")) From f36039551cdd8b32464657d97451d9e6a1f6326d Mon Sep 17 00:00:00 2001 From: Unix1 Date: Sat, 29 Mar 2014 09:41:43 -0700 Subject: [PATCH 2/2] replace dotall with multiline to simplify regex --- src/econfig_server.erl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/econfig_server.erl b/src/econfig_server.erl index 04382ad..d0942cb 100644 --- a/src/econfig_server.erl +++ b/src/econfig_server.erl @@ -545,8 +545,9 @@ parse_ini_bin_to_sections(IniBin) -> parse_ini_sections_to_conf(ConfName, Sections) -> lists:foldl( fun([Name|Content], Acc) -> - case re:run(Content, <<"(?:^|\\n)[\\n]*((?:(?!;).)*?)=(.*?)(?:;.*?)?(?=\\n|$)">>, - [global, dotall, {capture, all_but_first, list}]) of + case re:run(Content, <<"^((?:(?!;).)*?)=(.*?)(?:;.*?)?$">>, + [global, multiline, {newline, any}, + {capture, all_but_first, list}]) of {match, KVList} -> lists:foldl( fun([K, V], {I, D}) ->