@@ -1564,6 +1564,42 @@ def submit_evaluation_for(
15641564 timestamp_ms : Optional [int ] = None ,
15651565 metadata : Optional [Dict [str , object ]] = None ,
15661566 assessment : Optional [str ] = None ,
1567+ ) -> None :
1568+ """
1569+ Submits a custom evaluation metric for a given span. This method is deprecated and will be
1570+ removed in the next major version of ddtrace (4.0). Please use `LLMObs.submit_evaluation()` instead.
1571+ """
1572+ log .warning (
1573+ "LLMObs.submit_evaluation_for() is deprecated and will be removed in the next major "
1574+ "version of ddtrace (4.0). Please use LLMObs.submit_evaluation() instead."
1575+ )
1576+ return cls .submit_evaluation (
1577+ label = label ,
1578+ metric_type = metric_type ,
1579+ value = value ,
1580+ span = span ,
1581+ span_with_tag_value = span_with_tag_value ,
1582+ tags = tags ,
1583+ ml_app = ml_app ,
1584+ timestamp_ms = timestamp_ms ,
1585+ metadata = metadata ,
1586+ assessment = assessment ,
1587+ )
1588+
1589+ @classmethod
1590+ def submit_evaluation (
1591+ cls ,
1592+ label : str ,
1593+ metric_type : str ,
1594+ value : Union [str , int , float , bool ],
1595+ span_context : Optional [Dict [str , str ]] = None ,
1596+ span : Optional [dict ] = None ,
1597+ span_with_tag_value : Optional [Dict [str , str ]] = None ,
1598+ tags : Optional [Dict [str , str ]] = None ,
1599+ ml_app : Optional [str ] = None ,
1600+ timestamp_ms : Optional [int ] = None ,
1601+ metadata : Optional [Dict [str , object ]] = None ,
1602+ assessment : Optional [str ] = None ,
15671603 ) -> None :
15681604 """
15691605 Submits a custom evaluation metric for a given span.
@@ -1572,6 +1608,9 @@ def submit_evaluation_for(
15721608 :param str metric_type: The type of the evaluation metric. One of "categorical", "score", "boolean".
15731609 :param value: The value of the evaluation metric.
15741610 Must be a string (categorical), integer (score), float (score), or boolean (boolean).
1611+ :param dict span_context: A dictionary containing the span_id and trace_id of interest. This is a
1612+ deprecated parameter and will be removed in the next major version of
1613+ ddtrace (4.0). Please use `span` or `span_with_tag_value` instead.
15751614 :param dict span: A dictionary of shape {'span_id': str, 'trace_id': str} uniquely identifying
15761615 the span associated with this evaluation.
15771616 :param dict span_with_tag_value: A dictionary with the format {'tag_key': str, 'tag_value': str}
@@ -1584,9 +1623,16 @@ def submit_evaluation_for(
15841623 evaluation metric.
15851624 :param str assessment: An assessment of the validity of this evaluation. Must be either "pass" or "fail".
15861625 """
1626+ if span_context is not None :
1627+ log .warning (
1628+ "The `span_context` parameter is deprecated and will be removed in the next major version of "
1629+ "ddtrace (4.0). Please use `span` or `span_with_tag_value` instead."
1630+ )
1631+ span = span or span_context
1632+
15871633 if cls .enabled is False :
15881634 log .debug (
1589- "LLMObs.submit_evaluation_for () called when LLMObs is not enabled. " ,
1635+ "LLMObs.submit_evaluation () called when LLMObs is not enabled. " ,
15901636 "Evaluation metric data will not be sent." ,
15911637 )
15921638 return
@@ -1659,6 +1705,15 @@ def submit_evaluation_for(
16591705 log .warning ("tags must be a dictionary of string key-value pairs." )
16601706 tags = {}
16611707
1708+ ml_app = ml_app if ml_app else config ._llmobs_ml_app
1709+ if not ml_app :
1710+ error = "missing_ml_app"
1711+ log .warning (
1712+ "ML App name is required for sending evaluation metrics. Evaluation metric data will not be sent. "
1713+ "Ensure this configuration is set before running your application."
1714+ )
1715+ return
1716+
16621717 evaluation_tags = {
16631718 "ddtrace.version" : ddtrace .__version__ ,
16641719 "ml_app" : ml_app ,
@@ -1672,15 +1727,6 @@ def submit_evaluation_for(
16721727 error = "invalid_tags"
16731728 log .warning ("Failed to parse tags. Tags for evaluation metrics must be strings." )
16741729
1675- ml_app = ml_app if ml_app else config ._llmobs_ml_app
1676- if not ml_app :
1677- error = "missing_ml_app"
1678- log .warning (
1679- "ML App name is required for sending evaluation metrics. Evaluation metric data will not be sent. "
1680- "Ensure this configuration is set before running your application."
1681- )
1682- return
1683-
16841730 evaluation_metric : LLMObsEvaluationMetricEvent = {
16851731 "join_on" : join_on ,
16861732 "label" : str (label ),
@@ -1711,144 +1757,6 @@ def submit_evaluation_for(
17111757 finally :
17121758 telemetry .record_llmobs_submit_evaluation (join_on , metric_type , error )
17131759
1714- @classmethod
1715- def submit_evaluation (
1716- cls ,
1717- span_context : Dict [str , str ],
1718- label : str ,
1719- metric_type : str ,
1720- value : Union [str , int , float , bool ],
1721- tags : Optional [Dict [str , str ]] = None ,
1722- ml_app : Optional [str ] = None ,
1723- timestamp_ms : Optional [int ] = None ,
1724- metadata : Optional [Dict [str , object ]] = None ,
1725- ) -> None :
1726- """
1727- Submits a custom evaluation metric for a given span ID and trace ID.
1728-
1729- :param span_context: A dictionary containing the span_id and trace_id of interest.
1730- :param str label: The name of the evaluation metric.
1731- :param str metric_type: The type of the evaluation metric. One of "categorical", "score", "boolean".
1732- :param value: The value of the evaluation metric.
1733- Must be a string (categorical), integer (score), float (score), or boolean (boolean).
1734- :param tags: A dictionary of string key-value pairs to tag the evaluation metric with.
1735- :param str ml_app: The name of the ML application
1736- :param int timestamp_ms: The timestamp in milliseconds when the evaluation metric result was generated.
1737- :param dict metadata: A JSON serializable dictionary of key-value metadata pairs relevant to the
1738- evaluation metric.
1739- """
1740- if cls .enabled is False :
1741- log .debug (
1742- "LLMObs.submit_evaluation() called when LLMObs is not enabled. Evaluation metric data will not be sent."
1743- )
1744- return
1745- error = None
1746- try :
1747- if not isinstance (span_context , dict ):
1748- error = "invalid_span"
1749- log .warning (
1750- "span_context must be a dictionary containing both span_id and trace_id keys. "
1751- "LLMObs.export_span() can be used to generate this dictionary from a given span."
1752- )
1753- return
1754-
1755- ml_app = ml_app if ml_app else config ._llmobs_ml_app
1756- if not ml_app :
1757- error = "missing_ml_app"
1758- log .warning (
1759- "ML App name is required for sending evaluation metrics. Evaluation metric data will not be sent. "
1760- "Ensure this configuration is set before running your application."
1761- )
1762- return
1763-
1764- timestamp_ms = timestamp_ms if timestamp_ms else int (time .time () * 1000 )
1765-
1766- if not isinstance (timestamp_ms , int ) or timestamp_ms < 0 :
1767- error = "invalid_timestamp"
1768- log .warning ("timestamp_ms must be a non-negative integer. Evaluation metric data will not be sent" )
1769- return
1770-
1771- span_id = span_context .get ("span_id" )
1772- trace_id = span_context .get ("trace_id" )
1773- if not (span_id and trace_id ):
1774- error = "invalid_span"
1775- log .warning (
1776- "span_id and trace_id must both be specified for the given evaluation metric to be submitted."
1777- )
1778- return
1779- if not label :
1780- error = "invalid_metric_label"
1781- log .warning ("label must be the specified name of the evaluation metric." )
1782- return
1783-
1784- if not metric_type or metric_type .lower () not in ("categorical" , "numerical" , "score" , "boolean" ):
1785- error = "invalid_metric_type"
1786- log .warning ("metric_type must be one of 'categorical', 'score', or 'boolean'." )
1787- return
1788-
1789- metric_type = metric_type .lower ()
1790- if metric_type == "numerical" :
1791- error = "invalid_metric_type"
1792- log .warning (
1793- "The evaluation metric type 'numerical' is unsupported. Use 'score' instead. "
1794- "Converting `numerical` metric to `score` type."
1795- )
1796- metric_type = "score"
1797-
1798- if metric_type == "categorical" and not isinstance (value , str ):
1799- error = "invalid_metric_value"
1800- log .warning ("value must be a string for a categorical metric." )
1801- return
1802- if metric_type == "score" and not isinstance (value , (int , float )):
1803- error = "invalid_metric_value"
1804- log .warning ("value must be an integer or float for a score metric." )
1805- return
1806- if metric_type == "boolean" and not isinstance (value , bool ):
1807- error = "invalid_metric_value"
1808- log .warning ("value must be a boolean for a boolean metric." )
1809- return
1810- if tags is not None and not isinstance (tags , dict ):
1811- error = "invalid_tags"
1812- log .warning ("tags must be a dictionary of string key-value pairs." )
1813- return
1814-
1815- # initialize tags with default values that will be overridden by user-provided tags
1816- evaluation_tags = {
1817- "ddtrace.version" : ddtrace .__version__ ,
1818- "ml_app" : ml_app ,
1819- }
1820-
1821- if tags :
1822- for k , v in tags .items ():
1823- try :
1824- evaluation_tags [ensure_text (k )] = ensure_text (v )
1825- except TypeError :
1826- error = "invalid_tags"
1827- log .warning ("Failed to parse tags. Tags for evaluation metrics must be strings." )
1828-
1829- evaluation_metric : LLMObsEvaluationMetricEvent = {
1830- "join_on" : {"span" : {"span_id" : span_id , "trace_id" : trace_id }},
1831- "label" : str (label ),
1832- "metric_type" : metric_type .lower (),
1833- "timestamp_ms" : timestamp_ms ,
1834- "{}_value" .format (metric_type ): value , # type: ignore
1835- "ml_app" : ml_app ,
1836- "tags" : ["{}:{}" .format (k , v ) for k , v in evaluation_tags .items ()],
1837- }
1838-
1839- if metadata :
1840- if not isinstance (metadata , dict ):
1841- error = "invalid_metadata"
1842- log .warning ("metadata must be json serializable dictionary." )
1843- else :
1844- metadata = safe_json (metadata )
1845- if metadata and isinstance (metadata , str ):
1846- evaluation_metric ["metadata" ] = json .loads (metadata )
1847-
1848- cls ._instance ._llmobs_eval_metric_writer .enqueue (evaluation_metric )
1849- finally :
1850- telemetry .record_llmobs_submit_evaluation ({"span" : span_context }, metric_type , error )
1851-
18521760 @classmethod
18531761 def _inject_llmobs_context (cls , span_context : Context , request_headers : Dict [str , str ]) -> None :
18541762 if cls .enabled is False :
0 commit comments