1
-
2
1
import os
3
2
import sys
4
3
from argparse import ArgumentParser , RawTextHelpFormatter , Namespace
11
10
12
11
def boolean_string (s ):
13
12
try :
14
- if s .lower () not in {' false' , ' true' }:
15
- raise ValueError (' Not a valid boolean string' )
13
+ if s .lower () not in {" false" , " true" }:
14
+ raise ValueError (" Not a valid boolean string" )
16
15
except AttributeError :
17
- raise ValueError (' Not a valid boolean string' )
18
- return s .lower () == ' true'
16
+ raise ValueError (" Not a valid boolean string" )
17
+ return s .lower () == " true"
19
18
20
19
21
20
def main (args = None ):
@@ -31,63 +30,105 @@ def main(args=None):
31
30
as well as the reconstruction error as a fxn of component
32
31
"""
33
32
34
- parser = ArgumentParser (prog = 'nmf_mapping' ,
35
- description = _BANNER , formatter_class = RawTextHelpFormatter )
33
+ parser = ArgumentParser (prog = "nmf_mapping" , description = _BANNER , formatter_class = RawTextHelpFormatter )
36
34
37
35
def tup (s ):
38
36
try :
39
- l , h = map (int , s .split (',' ))
40
- return l ,h
37
+ l , h = map (int , s .split ("," ))
38
+ return l , h
41
39
except :
42
- raise TypeError (' r range must be low, high' )
40
+ raise TypeError (" r range must be low, high" )
43
41
44
42
# args
45
- parser .add_argument ("directory" , default = None , type = str ,
46
- help = "a directory of PDFs to calculate NMF decomposition" )
43
+ parser .add_argument (
44
+ "directory" , default = None , type = str , help = "a directory of PDFs to calculate NMF decomposition"
45
+ )
47
46
group = parser .add_mutually_exclusive_group ()
48
- parser .add_argument ("--save-files" , default = True , type = boolean_string ,
49
- help = 'whether to save the component, graph, and json files in the execution directory\n '
50
- 'default: True\n '
51
- 'e.g. --save-files False' )
52
- group .add_argument ("--threshold" , default = None , type = int ,
53
- help = "a threshold for the number of structural phases graphed (NMF components returned)\n "
54
- "e.g. --threshold 3" )
55
- group .add_argument ("--improve-thresh" , default = None , type = float ,
56
- help = "a threshold (between 0 and 1) for the relative improvement ratio necessary to add an"
57
- " additional component. Default is 0.001. 0.1 Recommended for real data.\n "
58
- "e.g. --improve-thresh 0.1" )
59
- group .add_argument ("--pca-thresh" , default = None , type = float ,
60
- help = "a threshold (between 0 and 1) for the explained variance of PCA to determine the \n "
61
- "number of components for NMF. e.g. --pca-thresh 0.95" )
62
- parser .add_argument ("--n-iter" , default = None , type = int ,
63
- help = "total number of iterations to run NMF algo. Defaults to 1000. 10000 typical to publish." )
64
- parser .add_argument ("--xrd" , default = False , type = boolean_string ,
65
- help = "whether to look for .xy files rather than .gr files\n "
66
- "default: False\n "
67
- "e.g. --xrd True" )
68
- parser .add_argument ("--x_units" , default = None , type = str , choices = ["twotheta" , "q" ], required = '--xrd' in sys .argv ,
69
- help = "x axis units for XRD data\n "
70
- "default: None\n "
71
- "e.g. --x_units twotheta" )
72
- parser .add_argument ("--xrange" , default = None , type = tup , nargs = '*' ,
73
- help = "the x-range over which to calculate NMF, can be multiple ranges (e.g. --xrange 5,10 12,15)" )
74
- parser .add_argument ("--show" , default = True , type = boolean_string ,
75
- help = 'whether to show the plot' )
47
+ parser .add_argument (
48
+ "--save-files" ,
49
+ default = True ,
50
+ type = boolean_string ,
51
+ help = "whether to save the component, graph, and json files in the execution directory\n "
52
+ "default: True\n "
53
+ "e.g. --save-files False" ,
54
+ )
55
+ group .add_argument (
56
+ "--threshold" ,
57
+ default = None ,
58
+ type = int ,
59
+ help = "a threshold for the number of structural phases graphed (NMF components returned)\n "
60
+ "e.g. --threshold 3" ,
61
+ )
62
+ group .add_argument (
63
+ "--improve-thresh" ,
64
+ default = None ,
65
+ type = float ,
66
+ help = "a threshold (between 0 and 1) for the relative improvement ratio necessary to add an"
67
+ " additional component. Default is 0.001. 0.1 Recommended for real data.\n "
68
+ "e.g. --improve-thresh 0.1" ,
69
+ )
70
+ group .add_argument (
71
+ "--pca-thresh" ,
72
+ default = None ,
73
+ type = float ,
74
+ help = "a threshold (between 0 and 1) for the explained variance of PCA to determine the \n "
75
+ "number of components for NMF. e.g. --pca-thresh 0.95" ,
76
+ )
77
+ parser .add_argument (
78
+ "--n-iter" ,
79
+ default = None ,
80
+ type = int ,
81
+ help = "total number of iterations to run NMF algo. Defaults to 1000. 10000 typical to publish." ,
82
+ )
83
+ parser .add_argument (
84
+ "--xrd" ,
85
+ default = False ,
86
+ type = boolean_string ,
87
+ help = "whether to look for .xy files rather than .gr files\n " "default: False\n " "e.g. --xrd True" ,
88
+ )
89
+ parser .add_argument (
90
+ "--x_units" ,
91
+ default = None ,
92
+ type = str ,
93
+ choices = ["twotheta" , "q" ],
94
+ required = "--xrd" in sys .argv ,
95
+ help = "x axis units for XRD data\n " "default: None\n " "e.g. --x_units twotheta" ,
96
+ )
97
+ parser .add_argument (
98
+ "--xrange" ,
99
+ default = None ,
100
+ type = tup ,
101
+ nargs = "*" ,
102
+ help = "the x-range over which to calculate NMF, can be multiple ranges (e.g. --xrange 5,10 12,15)" ,
103
+ )
104
+ parser .add_argument ("--show" , default = True , type = boolean_string , help = "whether to show the plot" )
76
105
args0 = Namespace ()
77
106
args1 , _ = parser .parse_known_args (args , namespace = args0 )
78
107
79
108
input_list , data_list = nmf .load_data (args1 .directory , args1 .xrd )
80
109
if args1 .pca_thresh :
81
- df_components , df_component_weight_timeseries , df_reconstruction_error , df_explained_var_ratio = \
82
- nmf .NMF_decomposition (input_list , args1 .xrange , args1 .threshold , additional_comp = False ,
83
- improve_thresh = args1 .improve_thresh , n_iter = args1 .n_iter ,
84
- pca_thresh = args1 .pca_thresh )
110
+ df_components , df_component_weight_timeseries , df_reconstruction_error , df_explained_var_ratio = (
111
+ nmf .NMF_decomposition (
112
+ input_list ,
113
+ args1 .xrange ,
114
+ args1 .threshold ,
115
+ additional_comp = False ,
116
+ improve_thresh = args1 .improve_thresh ,
117
+ n_iter = args1 .n_iter ,
118
+ pca_thresh = args1 .pca_thresh ,
119
+ )
120
+ )
85
121
else :
86
- df_components , df_component_weight_timeseries , df_reconstruction_error = \
87
- nmf .NMF_decomposition (input_list , args1 .xrange , args1 .threshold , additional_comp = False ,
88
- improve_thresh = args1 .improve_thresh , n_iter = args1 .n_iter )
122
+ df_components , df_component_weight_timeseries , df_reconstruction_error = nmf .NMF_decomposition (
123
+ input_list ,
124
+ args1 .xrange ,
125
+ args1 .threshold ,
126
+ additional_comp = False ,
127
+ improve_thresh = args1 .improve_thresh ,
128
+ n_iter = args1 .n_iter ,
129
+ )
89
130
90
- print (f' Number of components: { len (df_components .columns )} ' )
131
+ print (f" Number of components: { len (df_components .columns )} " )
91
132
92
133
fig1 = nmf .component_plot (df_components , args1 .xrd , args1 .x_units , args1 .show )
93
134
fig2 = nmf .component_ratio_plot (df_component_weight_timeseries , args1 .show )
@@ -96,35 +137,44 @@ def tup(s):
96
137
fig4 = nmf .explained_variance_plot (df_explained_var_ratio , args1 .show )
97
138
98
139
if args1 .save_files :
99
- if not os .path .exists (os .path .join (os .getcwd (), 'nmf_result' )):
100
- os .mkdir (os .path .join (os .getcwd (), 'nmf_result' ))
101
- output_fn = datetime .fromtimestamp (time .time ()).strftime (
102
- '%Y%m%d%H%M%S%f' )
103
- df_components .to_json (os .path .join (os .getcwd (), 'nmf_result' , 'x_index_vs_y_col_components.json' ))
104
- df_component_weight_timeseries .to_json (os .path .join (os .getcwd (), 'nmf_result' , 'component_index_vs_pratio_col.json' ))
105
- df_component_weight_timeseries .to_csv (os .path .join (os .getcwd (), 'nmf_result' , output_fn + 'component_row_pratio_col.txt' ), header = None , index = False , sep = ' ' , mode = 'a' )
106
- df_reconstruction_error .to_json (os .path .join (os .getcwd (), 'nmf_result' , 'component_index_vs_RE_value.json' ))
107
- plot_file1 = os .path .join (os .getcwd (), 'nmf_result' , output_fn + "comp_plot.png" )
108
- plot_file2 = os .path .join (os .getcwd (), 'nmf_result' , output_fn + "ratio_plot.png" )
109
- plot_file3 = os .path .join (os .getcwd (), 'nmf_result' , output_fn + "loss_plot.png" )
140
+ if not os .path .exists (os .path .join (os .getcwd (), "nmf_result" )):
141
+ os .mkdir (os .path .join (os .getcwd (), "nmf_result" ))
142
+ output_fn = datetime .fromtimestamp (time .time ()).strftime ("%Y%m%d%H%M%S%f" )
143
+ df_components .to_json (os .path .join (os .getcwd (), "nmf_result" , "x_index_vs_y_col_components.json" ))
144
+ df_component_weight_timeseries .to_json (
145
+ os .path .join (os .getcwd (), "nmf_result" , "component_index_vs_pratio_col.json" )
146
+ )
147
+ df_component_weight_timeseries .to_csv (
148
+ os .path .join (os .getcwd (), "nmf_result" , output_fn + "component_row_pratio_col.txt" ),
149
+ header = None ,
150
+ index = False ,
151
+ sep = " " ,
152
+ mode = "a" ,
153
+ )
154
+ df_reconstruction_error .to_json (
155
+ os .path .join (os .getcwd (), "nmf_result" , "component_index_vs_RE_value.json" )
156
+ )
157
+ plot_file1 = os .path .join (os .getcwd (), "nmf_result" , output_fn + "comp_plot.png" )
158
+ plot_file2 = os .path .join (os .getcwd (), "nmf_result" , output_fn + "ratio_plot.png" )
159
+ plot_file3 = os .path .join (os .getcwd (), "nmf_result" , output_fn + "loss_plot.png" )
110
160
if args1 .pca_thresh :
111
- plot_file7 = os .path .join (os .getcwd (), ' nmf_result' , output_fn + "pca_var_plot.png" )
112
- plot_file4 = os .path .splitext (plot_file1 )[0 ] + ' .pdf'
113
- plot_file5 = os .path .splitext (plot_file2 )[0 ] + ' .pdf'
114
- plot_file6 = os .path .splitext (plot_file3 )[0 ] + ' .pdf'
161
+ plot_file7 = os .path .join (os .getcwd (), " nmf_result" , output_fn + "pca_var_plot.png" )
162
+ plot_file4 = os .path .splitext (plot_file1 )[0 ] + " .pdf"
163
+ plot_file5 = os .path .splitext (plot_file2 )[0 ] + " .pdf"
164
+ plot_file6 = os .path .splitext (plot_file3 )[0 ] + " .pdf"
115
165
if args1 .pca_thresh :
116
- plot_file8 = os .path .splitext (plot_file7 )[0 ] + ' .pdf'
117
- txt_file = os .path .join (os .getcwd (), ' nmf_result' , output_fn + ' _meta' + ' .txt' )
118
- with open (txt_file , 'w+' ) as fi :
119
- fi .write (' NMF Analysis\n \n ' )
120
- fi .write (f' { len (df_component_weight_timeseries .columns )} files uploaded for analysis.\n \n ' )
121
- fi .write (f' The selected active r ranges are: { args1 .xrange } \n \n ' )
122
- fi .write (' Thesholding:\n ' )
123
- fi .write (f' \t The input component threshold was: { args1 .threshold } \n ' )
124
- fi .write (f' \t The input improvement threshold was: { args1 .improve_thresh } \n ' )
125
- fi .write (f' \t The input # of iterations to run was: { args1 .n_iter } \n ' )
126
- fi .write (f' \t Was PCA thresholding used?: { args1 .pca_thresh } \n ' )
127
- fi .write (f' { len (df_components .columns )} components were extracted' )
166
+ plot_file8 = os .path .splitext (plot_file7 )[0 ] + " .pdf"
167
+ txt_file = os .path .join (os .getcwd (), " nmf_result" , output_fn + " _meta" + " .txt" )
168
+ with open (txt_file , "w+" ) as fi :
169
+ fi .write (" NMF Analysis\n \n " )
170
+ fi .write (f" { len (df_component_weight_timeseries .columns )} files uploaded for analysis.\n \n " )
171
+ fi .write (f" The selected active r ranges are: { args1 .xrange } \n \n " )
172
+ fi .write (" Thesholding:\n " )
173
+ fi .write (f" \t The input component threshold was: { args1 .threshold } \n " )
174
+ fi .write (f" \t The input improvement threshold was: { args1 .improve_thresh } \n " )
175
+ fi .write (f" \t The input # of iterations to run was: { args1 .n_iter } \n " )
176
+ fi .write (f" \t Was PCA thresholding used?: { args1 .pca_thresh } \n " )
177
+ fi .write (f" { len (df_components .columns )} components were extracted" )
128
178
129
179
fig1 .savefig (plot_file1 )
130
180
fig2 .savefig (plot_file2 )
@@ -141,14 +191,21 @@ def tup(s):
141
191
data = np .column_stack ([df_components .index .to_list (), df_components [col ].to_list ()])
142
192
143
193
if args1 .xrd :
144
- np .savetxt (os .path .join (os .getcwd (), 'nmf_result' , output_fn + f'_comp{ i } ' + '.xy' ), data ,
145
- header = f"NMF Generated XRD\n Source = nmfMapping\n "
146
- f"Date = { output_fn } \n { args1 .x_units } Intensity\n " , fmt = '%s' ,
147
- comments = "' " )
194
+ np .savetxt (
195
+ os .path .join (os .getcwd (), "nmf_result" , output_fn + f"_comp{ i } " + ".xy" ),
196
+ data ,
197
+ header = f"NMF Generated XRD\n Source = nmfMapping\n "
198
+ f"Date = { output_fn } \n { args1 .x_units } Intensity\n " ,
199
+ fmt = "%s" ,
200
+ comments = "' " ,
201
+ )
148
202
else :
149
- np .savetxt (os .path .join (os .getcwd (), 'nmf_result' , output_fn + f'_comp{ i } ' + '.cgr' ), data ,
150
- header = f"NMF Generated PDF\n Source: nmfMapping\n "
151
- f"Date: { output_fn } \n r g" , fmt = '%s' )
203
+ np .savetxt (
204
+ os .path .join (os .getcwd (), "nmf_result" , output_fn + f"_comp{ i } " + ".cgr" ),
205
+ data ,
206
+ header = f"NMF Generated PDF\n Source: nmfMapping\n " f"Date: { output_fn } \n r g" ,
207
+ fmt = "%s" ,
208
+ )
152
209
153
210
154
211
if __name__ == "__main__" :
0 commit comments