Skip to content

Summarizer

SummarizerAgent

Bases: OpenAIAgent

An agent that summarizes input text using OpenAI's language models, incorporating results from natural language and SQL queries.

Properties (in addition to OpenAIAgent properties):

Name Type Default Description
template str "" The template string used to generate the summary, which can include placeholders for query results.
questions dict {} A dictionary of natural language questions to be asked as part of the summarization process.
queries dict {} A dictionary of SQL queries to be executed as part of the summarization process.
rephrase bool True Whether to rephrase the generated summary for improved readability.

Inputs: - DEFAULT: The main input stream where the agent receives text to summarize.

Outputs: - DEFAULT: The output stream where the summary text is sent, tagged as SUMMARY.

Source code in blue/agents/summarizer.py
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
class SummarizerAgent(OpenAIAgent):
    """An agent that summarizes input text using OpenAI's language models, incorporating results from natural language and SQL queries.

    Properties (in addition to OpenAIAgent properties):
    ----------
    | Name           | Type                 | Default | Description |
    |----------------|--------------------|----------|---------|
    | `template`      | `str`                | `""`      | The template string used to generate the summary, which can include placeholders for query results. |
    | `questions`     | `dict`               | `{}`      | A dictionary of natural language questions to be asked as part of the summarization process. |
    | `queries`       | `dict`               | `{}`      | A dictionary of SQL queries to be executed as part of the summarization process. |
    | `rephrase`      | `bool`               | `True`    | Whether to rephrase the generated summary for improved readability. |

    Inputs:
    - `DEFAULT`: The main input stream where the agent receives text to summarize.

    Outputs:
    - `DEFAULT`: The output stream where the summary text is sent, tagged as SUMMARY.
    """

    def __init__(self, **kwargs):
        if "name" not in kwargs:
            kwargs["name"] = "SUMMARIZER"
        super().__init__(**kwargs)

    def _initialize(self, properties=None):
        super()._initialize(properties=properties)

        # additional initialization

    def _initialize_properties(self):
        super()._initialize_properties()

        for key in agent_properties:
            self.properties[key] = agent_properties[key]

    ####### inputs / outputs
    def _initialize_inputs(self):
        """Initialize input parameters for the summarizer agent. No inputs by default."""
        return

    def _initialize_outputs(self):
        """Initialize outputs for the summarizer agent, tagged as SUMMARY."""
        self.add_output("DEFAULT", description="summary text incorporating query results", tags=["SUMMARY"])

    def issue_nl_query(self, question, progress_id=None, name=None, worker=None, to_param_prefix="QUESTION_RESULTS_"):
        """Issue a natural language question to the NL2SQL agent as part of the summarization process.

        Parameters:
            question: The natural language question to ask.
            progress_id: An optional progress identifier for tracking.
            name: An optional name for the question.
            worker: The worker handling the processing.
            to_param_prefix: The prefix for the output parameter name.

        Returns:
            None
        """
        if worker == None:
            worker = self.create_worker(None)

        if progress_id is None:
            progress_id = worker.sid

        # progress
        worker.write_progress(progress_id=progress_id, label='Issuing question:' + question, value=self.current_step / self.num_steps)

        # plan
        p = AgenticPlan(scope=worker.prefix)
        # set input
        p.define_input(name, value=question)
        # set plan
        p.connect_input_to_agent(from_input=name, to_agent="NL2SQL")
        p.connect_agent_to_agent(from_agent="NL2SQL", to_agent=self.name, to_agent_input=to_param_prefix + name)

        # submit plan
        p.submit(worker)

    def issue_sql_query(self, query, progress_id=None, name=None, worker=None, to_param_prefix="QUERY_RESULTS_"):
        """Issue a SQL query to the QueryExecutor agent as part of the summarization process.

        Parameters:
            query: The SQL query to execute.
            progress_id: An optional progress identifier for tracking.
            name: An optional name for the query.
            worker: The worker handling the processing.
            to_param_prefix: The prefix for the output parameter name.
        """
        if worker == None:
            worker = self.create_worker(None)

        if progress_id is None:
            progress_id = worker.sid

        # progress
        worker.write_progress(progress_id=progress_id, label='Issuing query:' + query, value=self.current_step / self.num_steps)

        # plan
        p = AgenticPlan(scope=worker.prefix)
        # set input
        p.define_input(name, value=query)
        # set plan
        p.connect_input_to_agent(from_input=name, to_agent="QUERYEXECUTOR")
        p.connect_agent_to_agent(from_agent="QUERYEXECUTOR", to_agent=self.name, to_agent_input=to_param_prefix + name)

        # submit plan
        p.submit(worker)

    def summarize_doc(self, progress_id=None, properties=None, input="", worker=None):
        """Summarize the input document using the configured template and query results.

        Parameters:
            progress_id: An optional progress identifier for tracking.
            properties: Additional properties for processing.
            input: The input text to summarize.
            worker: The worker handling the processing.
        """
        if worker == None:
            worker = self.create_worker(None)

        if progress_id is None:
            progress_id = worker.sid

        if properties is None:
            properties = self.properties

        # progress
        worker.write_progress(progress_id=progress_id, label='Summarizing doc...', value=self.current_step / self.num_steps)

        session_data = worker.get_all_session_data()

        if session_data is None:
            session_data = {}

        # create a unique id
        id = uuid_utils.create_uuid()

        summary_template = properties['template']
        summary = string_utils.safe_substitute(summary_template, **self.results, **session_data, input=input)

        if 'rephrase' in properties and properties['rephrase']:
            # progress
            worker.write_progress(progress_id=progress_id, label='Rephrasing doc...', value=self.current_step / self.num_steps)

            session_data = self.session.get_all_data()

            #### call api to rephrase summary
            worker.write_data(self.execute_api_call(summary, properties=properties, additional_data=session_data))
            worker.write_eos()

        else:
            worker.write_data(summary)
            worker.write_eos()

        # progress, done
        worker.write_progress(progress_id=progress_id, label='Done...', value=1.0)

    def default_processor(self, message, input="DEFAULT", properties=None, worker=None):
        """Process messages for the summarizer agent, incorporating results from natural language and SQL queries to generate a summary.

        Parameters:
            message: The incoming message to process.
            input: The input stream name. Defaults to "DEFAULT".
            properties: Additional properties for processing.
            worker: The worker handling the processing.

        Returns:
            None or a response message.
        """
        ##### Upon USER input text
        if input == "DEFAULT":
            if message.isEOS():
                # get all data received from user stream
                stream = message.getStream()

                self.progress_id = stream

                stream_data = worker.get_data(stream)
                input_data = " ".join(stream_data)
                worker.set_data("input", input_data)

                if worker:
                    session_data = worker.get_all_session_data()

                    if session_data is None:
                        session_data = {}

                    # user initiated summarizer, kick off queries from template
                    self.results = {}
                    self.todos = set()

                    self.num_steps = 1
                    self.current_step = 0

                    if 'questions' in self.properties:
                        self.num_steps = self.num_steps + len(self.properties['questions'].keys())
                    if 'queries' in self.properties:
                        self.num_steps = self.num_steps + len(self.properties['queries'].keys())

                    # nl questions
                    if 'questions' in self.properties:
                        questions = self.properties['questions']
                        for question_name in questions:
                            q = questions[question_name]
                            question = string_utils.safe_substitute(q, **self.properties, **session_data, input=input_data)
                            self.todos.add(question_name)
                            self.issue_nl_query(question, name=question_name, worker=worker, progress_id=self.progress_id)

                    # db queries
                    if 'queries' in self.properties:
                        queries = self.properties['queries']
                        for query_name in queries:
                            q = queries[query_name]
                            if type(q) == dict:
                                q = json.dumps(q)
                            else:
                                q = str(q)
                            query = string_utils.safe_substitute(q, **self.properties, **session_data, input=input_data)
                            self.todos.add(query_name)
                            self.issue_sql_query(query, name=query_name, worker=worker, progress_id=self.progress_id)
                    return

            elif message.isBOS():
                stream = message.getStream()

                # init private stream data to empty array
                if worker:
                    worker.set_data(stream, [])
                pass
            elif message.isData():
                # store data value
                data = message.getData()
                stream = message.getStream()

                # append to private stream data
                if worker:
                    worker.append_data(stream, data)

        elif input.find("QUERY_RESULTS_") == 0:
            if message.isData():
                stream = message.getStream()

                # get query
                query = input[len("QUERY_RESULTS_") :]

                data = message.getData()

                if 'result' in data:
                    query_results = data['result']

                    self.todos.remove(query)
                    self.results[query] = query_results

                    # all queries received
                    if len(self.todos) == 0:
                        input_data = worker.get_data("input")
                        if input_data is None:
                            input_data = ""
                        self.summarize_doc(properties=properties, input=input_data, worker=worker, progress_id=self.progress_id)
                else:
                    self.logger.info("nothing found")
        elif input.find("QUESTION_RESULTS_") == 0:
            if message.isData():
                stream = message.getStream()

                # get question
                question = input[len("QUESTION_RESULTS_") :]

                data = message.getData()

                if 'result' in data:
                    question_results = data['result']

                    self.todos.remove(question)
                    self.results[question] = question_results

                    # all questions received
                    if len(self.todos) == 0:
                        input_data = worker.get_data("input")
                        if input_data is None:
                            input_data = ""
                        self.summarize_doc(properties=properties, input=input_data, worker=worker, progress_id=self.progress_id)
                else:
                    self.logger.info("nothing found")

default_processor(message, input='DEFAULT', properties=None, worker=None)

Process messages for the summarizer agent, incorporating results from natural language and SQL queries to generate a summary.

Parameters:

Name Type Description Default
message

The incoming message to process.

required
input

The input stream name. Defaults to "DEFAULT".

'DEFAULT'
properties

Additional properties for processing.

None
worker

The worker handling the processing.

None

Returns:

Type Description

None or a response message.

Source code in blue/agents/summarizer.py
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
def default_processor(self, message, input="DEFAULT", properties=None, worker=None):
    """Process messages for the summarizer agent, incorporating results from natural language and SQL queries to generate a summary.

    Parameters:
        message: The incoming message to process.
        input: The input stream name. Defaults to "DEFAULT".
        properties: Additional properties for processing.
        worker: The worker handling the processing.

    Returns:
        None or a response message.
    """
    ##### Upon USER input text
    if input == "DEFAULT":
        if message.isEOS():
            # get all data received from user stream
            stream = message.getStream()

            self.progress_id = stream

            stream_data = worker.get_data(stream)
            input_data = " ".join(stream_data)
            worker.set_data("input", input_data)

            if worker:
                session_data = worker.get_all_session_data()

                if session_data is None:
                    session_data = {}

                # user initiated summarizer, kick off queries from template
                self.results = {}
                self.todos = set()

                self.num_steps = 1
                self.current_step = 0

                if 'questions' in self.properties:
                    self.num_steps = self.num_steps + len(self.properties['questions'].keys())
                if 'queries' in self.properties:
                    self.num_steps = self.num_steps + len(self.properties['queries'].keys())

                # nl questions
                if 'questions' in self.properties:
                    questions = self.properties['questions']
                    for question_name in questions:
                        q = questions[question_name]
                        question = string_utils.safe_substitute(q, **self.properties, **session_data, input=input_data)
                        self.todos.add(question_name)
                        self.issue_nl_query(question, name=question_name, worker=worker, progress_id=self.progress_id)

                # db queries
                if 'queries' in self.properties:
                    queries = self.properties['queries']
                    for query_name in queries:
                        q = queries[query_name]
                        if type(q) == dict:
                            q = json.dumps(q)
                        else:
                            q = str(q)
                        query = string_utils.safe_substitute(q, **self.properties, **session_data, input=input_data)
                        self.todos.add(query_name)
                        self.issue_sql_query(query, name=query_name, worker=worker, progress_id=self.progress_id)
                return

        elif message.isBOS():
            stream = message.getStream()

            # init private stream data to empty array
            if worker:
                worker.set_data(stream, [])
            pass
        elif message.isData():
            # store data value
            data = message.getData()
            stream = message.getStream()

            # append to private stream data
            if worker:
                worker.append_data(stream, data)

    elif input.find("QUERY_RESULTS_") == 0:
        if message.isData():
            stream = message.getStream()

            # get query
            query = input[len("QUERY_RESULTS_") :]

            data = message.getData()

            if 'result' in data:
                query_results = data['result']

                self.todos.remove(query)
                self.results[query] = query_results

                # all queries received
                if len(self.todos) == 0:
                    input_data = worker.get_data("input")
                    if input_data is None:
                        input_data = ""
                    self.summarize_doc(properties=properties, input=input_data, worker=worker, progress_id=self.progress_id)
            else:
                self.logger.info("nothing found")
    elif input.find("QUESTION_RESULTS_") == 0:
        if message.isData():
            stream = message.getStream()

            # get question
            question = input[len("QUESTION_RESULTS_") :]

            data = message.getData()

            if 'result' in data:
                question_results = data['result']

                self.todos.remove(question)
                self.results[question] = question_results

                # all questions received
                if len(self.todos) == 0:
                    input_data = worker.get_data("input")
                    if input_data is None:
                        input_data = ""
                    self.summarize_doc(properties=properties, input=input_data, worker=worker, progress_id=self.progress_id)
            else:
                self.logger.info("nothing found")

issue_nl_query(question, progress_id=None, name=None, worker=None, to_param_prefix='QUESTION_RESULTS_')

Issue a natural language question to the NL2SQL agent as part of the summarization process.

Parameters:

Name Type Description Default
question

The natural language question to ask.

required
progress_id

An optional progress identifier for tracking.

None
name

An optional name for the question.

None
worker

The worker handling the processing.

None
to_param_prefix

The prefix for the output parameter name.

'QUESTION_RESULTS_'

Returns:

Type Description

None

Source code in blue/agents/summarizer.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
def issue_nl_query(self, question, progress_id=None, name=None, worker=None, to_param_prefix="QUESTION_RESULTS_"):
    """Issue a natural language question to the NL2SQL agent as part of the summarization process.

    Parameters:
        question: The natural language question to ask.
        progress_id: An optional progress identifier for tracking.
        name: An optional name for the question.
        worker: The worker handling the processing.
        to_param_prefix: The prefix for the output parameter name.

    Returns:
        None
    """
    if worker == None:
        worker = self.create_worker(None)

    if progress_id is None:
        progress_id = worker.sid

    # progress
    worker.write_progress(progress_id=progress_id, label='Issuing question:' + question, value=self.current_step / self.num_steps)

    # plan
    p = AgenticPlan(scope=worker.prefix)
    # set input
    p.define_input(name, value=question)
    # set plan
    p.connect_input_to_agent(from_input=name, to_agent="NL2SQL")
    p.connect_agent_to_agent(from_agent="NL2SQL", to_agent=self.name, to_agent_input=to_param_prefix + name)

    # submit plan
    p.submit(worker)

issue_sql_query(query, progress_id=None, name=None, worker=None, to_param_prefix='QUERY_RESULTS_')

Issue a SQL query to the QueryExecutor agent as part of the summarization process.

Parameters:

Name Type Description Default
query

The SQL query to execute.

required
progress_id

An optional progress identifier for tracking.

None
name

An optional name for the query.

None
worker

The worker handling the processing.

None
to_param_prefix

The prefix for the output parameter name.

'QUERY_RESULTS_'
Source code in blue/agents/summarizer.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
def issue_sql_query(self, query, progress_id=None, name=None, worker=None, to_param_prefix="QUERY_RESULTS_"):
    """Issue a SQL query to the QueryExecutor agent as part of the summarization process.

    Parameters:
        query: The SQL query to execute.
        progress_id: An optional progress identifier for tracking.
        name: An optional name for the query.
        worker: The worker handling the processing.
        to_param_prefix: The prefix for the output parameter name.
    """
    if worker == None:
        worker = self.create_worker(None)

    if progress_id is None:
        progress_id = worker.sid

    # progress
    worker.write_progress(progress_id=progress_id, label='Issuing query:' + query, value=self.current_step / self.num_steps)

    # plan
    p = AgenticPlan(scope=worker.prefix)
    # set input
    p.define_input(name, value=query)
    # set plan
    p.connect_input_to_agent(from_input=name, to_agent="QUERYEXECUTOR")
    p.connect_agent_to_agent(from_agent="QUERYEXECUTOR", to_agent=self.name, to_agent_input=to_param_prefix + name)

    # submit plan
    p.submit(worker)

summarize_doc(progress_id=None, properties=None, input='', worker=None)

Summarize the input document using the configured template and query results.

Parameters:

Name Type Description Default
progress_id

An optional progress identifier for tracking.

None
properties

Additional properties for processing.

None
input

The input text to summarize.

''
worker

The worker handling the processing.

None
Source code in blue/agents/summarizer.py
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
def summarize_doc(self, progress_id=None, properties=None, input="", worker=None):
    """Summarize the input document using the configured template and query results.

    Parameters:
        progress_id: An optional progress identifier for tracking.
        properties: Additional properties for processing.
        input: The input text to summarize.
        worker: The worker handling the processing.
    """
    if worker == None:
        worker = self.create_worker(None)

    if progress_id is None:
        progress_id = worker.sid

    if properties is None:
        properties = self.properties

    # progress
    worker.write_progress(progress_id=progress_id, label='Summarizing doc...', value=self.current_step / self.num_steps)

    session_data = worker.get_all_session_data()

    if session_data is None:
        session_data = {}

    # create a unique id
    id = uuid_utils.create_uuid()

    summary_template = properties['template']
    summary = string_utils.safe_substitute(summary_template, **self.results, **session_data, input=input)

    if 'rephrase' in properties and properties['rephrase']:
        # progress
        worker.write_progress(progress_id=progress_id, label='Rephrasing doc...', value=self.current_step / self.num_steps)

        session_data = self.session.get_all_data()

        #### call api to rephrase summary
        worker.write_data(self.execute_api_call(summary, properties=properties, additional_data=session_data))
        worker.write_eos()

    else:
        worker.write_data(summary)
        worker.write_eos()

    # progress, done
    worker.write_progress(progress_id=progress_id, label='Done...', value=1.0)
Last update: 2025-10-09