diff --git a/.github/workflows/mlrun.yml b/.github/workflows/mlrun.yml index 3606743..36254f6 100644 --- a/.github/workflows/mlrun.yml +++ b/.github/workflows/mlrun.yml @@ -1,3 +1,17 @@ +# Copyright 2021 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# name: mlrun-project-workflow on: [issue_comment] diff --git a/gen_iris.py b/gen_iris.py index ea50fe3..883b3da 100644 --- a/gen_iris.py +++ b/gen_iris.py @@ -1,3 +1,17 @@ +# Copyright 2021 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# import pandas as pd from sklearn.datasets import load_iris diff --git a/kflow.py b/kflow.py index 621a400..78c693c 100644 --- a/kflow.py +++ b/kflow.py @@ -1,35 +1,49 @@ -import mlrun -from kfp import dsl - -funcs = {} -project = mlrun.projects.pipeline_context.project -iris_data = 'https://s3.wasabisys.com/iguazio/data/iris/iris.data.raw.csv' -default_pkg_class = "sklearn.linear_model.LogisticRegression" - -@dsl.pipeline( - name="Demo training pipeline", - description="Shows how to use mlrun." -) -def kfpipeline(model_pkg_class=default_pkg_class, build=0): - - # if build=True, build the function image before the run - with dsl.Condition(build==1) as build_cond: - funcs["prep-data"].deploy_step(skip_deployed=True) - - # run a local data prep function - prep_data = funcs["prep-data"].as_step(name='prep_data', - inputs={'source_url': project.get_artifact_uri("data")}, - outputs=["cleaned_data"]).after(build_cond) - - # train the model using a library (hub://) function and the generated data - train = funcs["train"].as_step(name='train', - inputs={'dataset': prep_data.outputs['cleaned_data']}, - params={'model_pkg_class': model_pkg_class, - 'label_column': project.get_param('label', 'label')}, - outputs=["model", "test_set"]) - - # test the model using a library (hub://) function and the generated model - test = funcs["test"].as_step(name="test", - params={"label_column": "label"}, - inputs={"models_path": train.outputs['model'], - "test_set": train.outputs['test_set']}) +# Copyright 2021 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import mlrun +from kfp import dsl + +funcs = {} +project = mlrun.projects.pipeline_context.project +iris_data = 'https://s3.wasabisys.com/iguazio/data/iris/iris.data.raw.csv' +default_pkg_class = "sklearn.linear_model.LogisticRegression" + +@dsl.pipeline( + name="Demo training pipeline", + description="Shows how to use mlrun." +) +def kfpipeline(model_pkg_class=default_pkg_class, build=0): + + # if build=True, build the function image before the run + with dsl.Condition(build==1) as build_cond: + funcs["prep-data"].deploy_step(skip_deployed=True) + + # run a local data prep function + prep_data = funcs["prep-data"].as_step(name='prep_data', + inputs={'source_url': project.get_artifact_uri("data")}, + outputs=["cleaned_data"]).after(build_cond) + + # train the model using a library (hub://) function and the generated data + train = funcs["train"].as_step(name='train', + inputs={'dataset': prep_data.outputs['cleaned_data']}, + params={'model_pkg_class': model_pkg_class, + 'label_column': project.get_param('label', 'label')}, + outputs=["model", "test_set"]) + + # test the model using a library (hub://) function and the generated model + test = funcs["test"].as_step(name="test", + params={"label_column": "label"}, + inputs={"models_path": train.outputs['model'], + "test_set": train.outputs['test_set']}) diff --git a/newflow.py b/newflow.py index 738007d..60a5f06 100644 --- a/newflow.py +++ b/newflow.py @@ -1,3 +1,17 @@ +# Copyright 2021 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# from kfp import dsl from mlrun import build_function, deploy_function, get_current_project, run_function diff --git a/prep_data.py b/prep_data.py index a9412ed..57080d9 100644 --- a/prep_data.py +++ b/prep_data.py @@ -1,13 +1,27 @@ -import mlrun - -def prep_data(context, source_url: mlrun.DataItem, label_column='label'): - # Convert the DataItem to a pandas DataFrame - df = source_url.as_df() - print("data url:", source_url.url) - df[label_column] = df[label_column].astype('category').cat.codes - - # Record the DataFrane length after the run - context.log_result('num_rows', df.shape[0]) - - # Store the data set in your artifacts database +# Copyright 2021 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import mlrun + +def prep_data(context, source_url: mlrun.DataItem, label_column='label'): + # Convert the DataItem to a pandas DataFrame + df = source_url.as_df() + print("data url:", source_url.url) + df[label_column] = df[label_column].astype('category').cat.codes + + # Record the DataFrane length after the run + context.log_result('num_rows', df.shape[0]) + + # Store the data set in your artifacts database context.log_dataset('cleaned_data', df=df, index=False, format='csv') \ No newline at end of file diff --git a/project.yaml b/project.yaml index f4ed905..c53a493 100644 --- a/project.yaml +++ b/project.yaml @@ -1,41 +1,55 @@ -kind: project -metadata: - name: pipe -spec: - description: test - params: - label_column: label - functions: - - url: prep_data.py - name: prep-data - image: mlrun/mlrun - handler: prep_data - with_repo: true - - url: hub://sklearn_classifier - name: train - - url: hub://test_classifier - name: test - - url: hub://v2_model_server - name: serve - - url: gen_iris.py - name: gen-iris - image: mlrun/mlrun - handler: iris_generator - requirements: - - requests - workflows: - - name: main - path: ./kflow.py - engine: kfp - - name: newflow - path: ./newflow.py - handler: newpipe - artifacts: - - kind: '' - target_path: https://s3.wasabisys.com/iguazio/data/iris/iris.data.raw.csv - key: data - source: ./ - subpath: '' - origin_url: '' - load_source_on_run: true - desired_state: online +# Copyright 2021 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +kind: project +metadata: + name: pipe +spec: + description: test + params: + label_column: label + functions: + - url: prep_data.py + name: prep-data + image: mlrun/mlrun + handler: prep_data + with_repo: true + - url: hub://sklearn_classifier + name: train + - url: hub://test_classifier + name: test + - url: hub://v2_model_server + name: serve + - url: gen_iris.py + name: gen-iris + image: mlrun/mlrun + handler: iris_generator + requirements: + - requests + workflows: + - name: main + path: ./kflow.py + engine: kfp + - name: newflow + path: ./newflow.py + handler: newpipe + artifacts: + - kind: '' + target_path: https://s3.wasabisys.com/iguazio/data/iris/iris.data.raw.csv + key: data + source: ./ + subpath: '' + origin_url: '' + load_source_on_run: true + desired_state: online