Skip to content

Nl2llm operator

NL2LLMOperator

Bases: Operator, ServiceClient

NL2LLM operator processes natural language query using LLM models and returns structured data.

Attributes:

Name Type Required Default Description
query str - Natural language query to process
context str "" Optional context to provide domain knowledge
attrs list[dict] [] List of attribute specifications (dicts with name and optional type)
Source code in blue/operators/nl2llm_operator.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
class NL2LLMOperator(Operator, ServiceClient):
    """
    NL2LLM operator processes natural language query using LLM models and returns structured data.

    Attributes:
    ----------
    | Name    | Type       | Required | Default | Description                                               |
    |---------|------------|----------|---------|-----------------------------------------------------------|
    | `query`   | str        | :fontawesome-solid-circle-check: {.green-check}      | -       | Natural language query to process                         |
    | `context` | str        |        | ""      | Optional context to provide domain knowledge             |
    | `attrs`   | list[dict] |        | []      | List of attribute specifications (dicts with name and optional type) |

    """

    PROMPT = """
You are an intelligent system that converts a natural language query into a structured JSON output.

### General Requirements:
- Always output a **valid JSON array** of objects.
- Each element must be a well-formed JSON object with key–value pairs.
- Do **not** include explanations, comments, or additional text outside of the JSON.
- Strive to return non-empty output. If the query is vague, use best judgement to produce a meaningful result.

### Attributes:
- If output attributes are specified, every object must include them.
- If types are specified, ensure values match those types exactly (e.g., string, int, float, boolean, date, list, dict, etc.).
- If types are **not** specified, infer them reasonably from the query and context.
- If no attributes are provided, return the most relevant structured JSON representation of the query.

### Context:
- Use the provided context if it exists and is non-empty.

### Output Formatting:
- Return only the JSON array.
- No prose, explanations, or formatting (such as Markdown fences).
- Ensure strict JSON compliance (no trailing commas, keys quoted, etc.).

---

Query: ${query}

Attributes:
${attrs}

Context:
${context}

Output:
"""

    PROPERTIES = {
        # openai related properties
        "openai.api": "ChatCompletion",
        "openai.model": "gpt-4o",
        "openai.stream": False,
        "openai.max_tokens": 4096,
        "openai.temperature": 0,
        # io related properties
        "input_json": "[{\"role\": \"user\"}]",
        "input_context": "$[0]",
        "input_context_field": "content",
        "input_field": "messages",
        "input_template": PROMPT,
        "output_path": "$.choices[0].message.content",
        # service related properties
        "service_prefix": "openai",
        # output transformations
        "output_transformations": [{"transformation": "replace", "from": "```", "to": ""}, {"transformation": "replace", "from": "json", "to": ""}],
        "output_strip": True,
        "output_cast": "json",
    }

    name = "nl2llm"
    description = "Processes natural language query using LLM models and returns structured data"
    default_attributes = {
        "query": {"type": "str", "description": "Natural language query to process", "required": True},
        "context": {"type": "str", "description": "Optional context to provide domain knowledge", "required": False, "default": ""},
        "attrs": {"type": "list[dict]", "description": "List of attribute specifications (dicts with name and optional type)", "required": False, "default": []},
    }

    def __init__(self, description: str = None, properties: Dict[str, Any] = None):
        super().__init__(
            self.name,
            function=nl2llm_operator_function,
            description=description or self.description,
            properties=properties,
            validator=nl2llm_operator_validator,
            explainer=nl2llm_operator_explainer,
        )

    def _initialize_properties(self):
        super()._initialize_properties()

        # attribute definitions
        self.properties["attributes"] = self.default_attributes

        # service_url, set as default
        self.properties["service_url"] = PROPERTIES["services.openai.service_url"]

nl2llm_operator_explainer(output, input_data, attributes)

Generate explanation for nl2llm operator execution.

Parameters:

Name Type Description Default
output Any

The output result from the operator execution.

required
input_data List[List[Dict[str, Any]]]

The input data that was processed.

required
attributes Dict[str, Any]

The attributes used for the operation.

required

Returns:

Type Description
Dict[str, Any]

Dictionary containing explanation of the operation.

Source code in blue/operators/nl2llm_operator.py
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def nl2llm_operator_explainer(output: Any, input_data: List[List[Dict[str, Any]]], attributes: Dict[str, Any]) -> Dict[str, Any]:
    """Generate explanation for nl2llm operator execution.

    Parameters:
        output: The output result from the operator execution.
        input_data: The input data that was processed.
        attributes: The attributes used for the operation.

    Returns:
        Dictionary containing explanation of the operation.
    """
    return {
        'output': output,
        "attributes": attributes,
    }

nl2llm_operator_function(input_data, attributes, properties=None)

Process natural language query using LLM models and return structured data.

Parameters:

Name Type Description Default
input_data List[List[Dict[str, Any]]]

List of JSON arrays (List[List[Dict[str, Any]]]), not used for query processing.

required
attributes Dict[str, Any]

Dictionary containing query parameters including query, context, and attrs.

required
properties Dict[str, Any]

Optional properties dictionary containing service configuration. Defaults to None.

None

Returns:

Type Description
List[List[Dict[str, Any]]]

List containing structured data results from the natural language query.

Source code in blue/operators/nl2llm_operator.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
def nl2llm_operator_function(input_data: List[List[Dict[str, Any]]], attributes: Dict[str, Any], properties: Dict[str, Any] = None) -> List[List[Dict[str, Any]]]:
    """Process natural language query using LLM models and return structured data.

    Parameters:
        input_data: List of JSON arrays (List[List[Dict[str, Any]]]), not used for query processing.
        attributes: Dictionary containing query parameters including query, context, and attrs.
        properties: Optional properties dictionary containing service configuration. Defaults to None.

    Returns:
        List containing structured data results from the natural language query.
    """
    # Extract attributes
    query = attributes.get('query', '')
    context = attributes.get('context', '')
    attrs = attributes.get('attrs', [])

    if not query or not query.strip():
        return []

    # create attrs section in the prompt
    attrs_formatted = ""
    if attrs:
        attrs_formatted = "Required output attributes:\n"
        for attr in attrs:
            if 'type' in attr:
                attrs_formatted += f"- {attr['name']}: {attr['type']}\n"
            else:
                attrs_formatted += f"- {attr['name']}: (type will be inferred)\n"

    service_client = ServiceClient(name="nl2llm_operator_service_client", properties=properties)
    additional_data = {'query': query, 'context': context, 'attrs': attrs_formatted}

    return [service_client.execute_api_call({}, properties=properties, additional_data=additional_data)]

nl2llm_operator_validator(input_data, attributes, properties=None)

Validate nl2llm operator attributes.

Parameters:

Name Type Description Default
input_data List[List[Dict[str, Any]]]

List of JSON arrays (List[List[Dict[str, Any]]]) to validate.

required
attributes Dict[str, Any]

Dictionary containing operator attributes to validate.

required
properties Dict[str, Any]

Optional properties dictionary. Defaults to None.

None

Returns:

Type Description
bool

True if attributes are valid, False otherwise.

Source code in blue/operators/nl2llm_operator.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def nl2llm_operator_validator(input_data: List[List[Dict[str, Any]]], attributes: Dict[str, Any], properties: Dict[str, Any] = None) -> bool:
    """Validate nl2llm operator attributes.

    Parameters:
        input_data: List of JSON arrays (List[List[Dict[str, Any]]]) to validate.
        attributes: Dictionary containing operator attributes to validate.
        properties: Optional properties dictionary. Defaults to None.

    Returns:
        True if attributes are valid, False otherwise.
    """
    if not default_operator_validator(input_data, attributes, properties):
        return False

    # if attrs is provided, validate each element is a dict with 'name' key
    if 'attrs' in attributes and attributes['attrs']:
        for attr in attributes['attrs']:
            if not isinstance(attr, dict) or 'name' not in attr:
                return False
    return True
Last update: 2025-10-08