Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .github/workflows/mlrun.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright 2021 Iguazio
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
name: mlrun-project-workflow
on: [issue_comment]

Expand Down
14 changes: 14 additions & 0 deletions gen_iris.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright 2021 Iguazio
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import pandas as pd
from sklearn.datasets import load_iris

Expand Down
84 changes: 49 additions & 35 deletions kflow.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,49 @@
import mlrun
from kfp import dsl

funcs = {}
project = mlrun.projects.pipeline_context.project
iris_data = 'https://s3.wasabisys.com/iguazio/data/iris/iris.data.raw.csv'
default_pkg_class = "sklearn.linear_model.LogisticRegression"

@dsl.pipeline(
name="Demo training pipeline",
description="Shows how to use mlrun."
)
def kfpipeline(model_pkg_class=default_pkg_class, build=0):

# if build=True, build the function image before the run
with dsl.Condition(build==1) as build_cond:
funcs["prep-data"].deploy_step(skip_deployed=True)

# run a local data prep function
prep_data = funcs["prep-data"].as_step(name='prep_data',
inputs={'source_url': project.get_artifact_uri("data")},
outputs=["cleaned_data"]).after(build_cond)

# train the model using a library (hub://) function and the generated data
train = funcs["train"].as_step(name='train',
inputs={'dataset': prep_data.outputs['cleaned_data']},
params={'model_pkg_class': model_pkg_class,
'label_column': project.get_param('label', 'label')},
outputs=["model", "test_set"])

# test the model using a library (hub://) function and the generated model
test = funcs["test"].as_step(name="test",
params={"label_column": "label"},
inputs={"models_path": train.outputs['model'],
"test_set": train.outputs['test_set']})
# Copyright 2021 Iguazio
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import mlrun
from kfp import dsl

funcs = {}
project = mlrun.projects.pipeline_context.project
iris_data = 'https://s3.wasabisys.com/iguazio/data/iris/iris.data.raw.csv'
default_pkg_class = "sklearn.linear_model.LogisticRegression"

@dsl.pipeline(
name="Demo training pipeline",
description="Shows how to use mlrun."
)
def kfpipeline(model_pkg_class=default_pkg_class, build=0):

# if build=True, build the function image before the run
with dsl.Condition(build==1) as build_cond:
funcs["prep-data"].deploy_step(skip_deployed=True)

# run a local data prep function
prep_data = funcs["prep-data"].as_step(name='prep_data',
inputs={'source_url': project.get_artifact_uri("data")},
outputs=["cleaned_data"]).after(build_cond)

# train the model using a library (hub://) function and the generated data
train = funcs["train"].as_step(name='train',
inputs={'dataset': prep_data.outputs['cleaned_data']},
params={'model_pkg_class': model_pkg_class,
'label_column': project.get_param('label', 'label')},
outputs=["model", "test_set"])

# test the model using a library (hub://) function and the generated model
test = funcs["test"].as_step(name="test",
params={"label_column": "label"},
inputs={"models_path": train.outputs['model'],
"test_set": train.outputs['test_set']})
14 changes: 14 additions & 0 deletions newflow.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright 2021 Iguazio
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from kfp import dsl

from mlrun import build_function, deploy_function, get_current_project, run_function
Expand Down
38 changes: 26 additions & 12 deletions prep_data.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,27 @@
import mlrun

def prep_data(context, source_url: mlrun.DataItem, label_column='label'):
# Convert the DataItem to a pandas DataFrame
df = source_url.as_df()
print("data url:", source_url.url)
df[label_column] = df[label_column].astype('category').cat.codes

# Record the DataFrane length after the run
context.log_result('num_rows', df.shape[0])

# Store the data set in your artifacts database
# Copyright 2021 Iguazio
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import mlrun

def prep_data(context, source_url: mlrun.DataItem, label_column='label'):
# Convert the DataItem to a pandas DataFrame
df = source_url.as_df()
print("data url:", source_url.url)
df[label_column] = df[label_column].astype('category').cat.codes

# Record the DataFrane length after the run
context.log_result('num_rows', df.shape[0])

# Store the data set in your artifacts database
context.log_dataset('cleaned_data', df=df, index=False, format='csv')
96 changes: 55 additions & 41 deletions project.yaml
Original file line number Diff line number Diff line change
@@ -1,41 +1,55 @@
kind: project
metadata:
name: pipe
spec:
description: test
params:
label_column: label
functions:
- url: prep_data.py
name: prep-data
image: mlrun/mlrun
handler: prep_data
with_repo: true
- url: hub://sklearn_classifier
name: train
- url: hub://test_classifier
name: test
- url: hub://v2_model_server
name: serve
- url: gen_iris.py
name: gen-iris
image: mlrun/mlrun
handler: iris_generator
requirements:
- requests
workflows:
- name: main
path: ./kflow.py
engine: kfp
- name: newflow
path: ./newflow.py
handler: newpipe
artifacts:
- kind: ''
target_path: https://s3.wasabisys.com/iguazio/data/iris/iris.data.raw.csv
key: data
source: ./
subpath: ''
origin_url: ''
load_source_on_run: true
desired_state: online
# Copyright 2021 Iguazio
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
kind: project
metadata:
name: pipe
spec:
description: test
params:
label_column: label
functions:
- url: prep_data.py
name: prep-data
image: mlrun/mlrun
handler: prep_data
with_repo: true
- url: hub://sklearn_classifier
name: train
- url: hub://test_classifier
name: test
- url: hub://v2_model_server
name: serve
- url: gen_iris.py
name: gen-iris
image: mlrun/mlrun
handler: iris_generator
requirements:
- requests
workflows:
- name: main
path: ./kflow.py
engine: kfp
- name: newflow
path: ./newflow.py
handler: newpipe
artifacts:
- kind: ''
target_path: https://s3.wasabisys.com/iguazio/data/iris/iris.data.raw.csv
key: data
source: ./
subpath: ''
origin_url: ''
load_source_on_run: true
desired_state: online