<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3.dtd">
<article article-type="research-article" dtd-version="1.3" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xml:lang="ru"><front><journal-meta><journal-id journal-id-type="publisher-id">lingngu</journal-id><journal-title-group><journal-title xml:lang="ru">Вестник НГУ. Серия: Лингвистика и межкультурная коммуникация</journal-title><trans-title-group xml:lang="en"><trans-title>NSU Vestnik. Series: Linguistics and Intercultural Communication</trans-title></trans-title-group></journal-title-group><issn pub-type="ppub">1818-7935</issn><publisher><publisher-name>Новосибирский государственный университет</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="doi">10.25205/1818-7935-2025-23-1-80-92</article-id><article-id custom-type="elpub" pub-id-type="custom">lingngu-906</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Article</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="ru"><subject>КОМПЬЮТЕРНАЯ И ПРИКЛАДНАЯ ЛИНГВИСТИКА</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="en"><subject>COMPUTER AND APPLIED LINGUISTICS</subject></subj-group></article-categories><title-group><article-title>Автоматическая саммаризация родительских чатов в WhatsApp</article-title><trans-title-group xml:lang="en"><trans-title>Automatic Summarization of Parental Chats on WhatsApp</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><contrib-id contrib-id-type="orcid">https://orcid.org/0009-0001-9548-3273</contrib-id><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Дмитриева</surname><given-names>К. А.</given-names></name><name name-style="western" xml:lang="en"><surname>Dmitrieva</surname><given-names>K. A.</given-names></name></name-alternatives><bio xml:lang="ru"><p>Дмитриева Кристина Александровна, стажер-исследователь</p><p>Санкт-Петербург</p></bio><bio xml:lang="en"><p>Kristina A. Dmitrieva, Research Assistant</p><p>Saint Petersburg</p></bio><email xlink:type="simple">kadmitrieva@hse.ru</email><xref ref-type="aff" rid="aff-1"/></contrib><contrib contrib-type="author" corresp="yes"><contrib-id contrib-id-type="orcid">https://orcid.org/0009-0005-4124-1956</contrib-id><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Жолус</surname><given-names>М. Р.</given-names></name><name name-style="western" xml:lang="en"><surname>Zholus</surname><given-names>M. R.</given-names></name></name-alternatives><bio xml:lang="ru"><p>Жолус Марина Романовна, стажер-исследователь, инженер-программист АО «Эврика»</p><p>Санкт-Петербург</p></bio><bio xml:lang="en"><p>Marina R. Zholus, Research Assistant, Software Engineer</p><p>Saint Petersburg</p></bio><email xlink:type="simple">mrzholus@edu.hse.ru</email><xref ref-type="aff" rid="aff-1"/></contrib></contrib-group><aff-alternatives id="aff-1"><aff xml:lang="ru">Национальный исследовательский университет «Высшая школа экономики»<country>Россия</country></aff><aff xml:lang="en">HSE University<country>Russian Federation</country></aff></aff-alternatives><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>04</day><month>07</month><year>2025</year></pub-date><volume>23</volume><issue>1</issue><fpage>80</fpage><lpage>92</lpage><permissions><copyright-statement>Copyright &amp;#x00A9; Дмитриева К.А., Жолус М.Р., 2025</copyright-statement><copyright-year>2025</copyright-year><copyright-holder xml:lang="ru">Дмитриева К.А., Жолус М.Р.</copyright-holder><copyright-holder xml:lang="en">Dmitrieva K.A., Zholus M.R.</copyright-holder><license license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/" xlink:type="simple"><license-p>This work is licensed under a Creative Commons Attribution 4.0 License.</license-p></license></permissions><self-uri xlink:href="https://lingngu.elpub.ru/jour/article/view/906">https://lingngu.elpub.ru/jour/article/view/906</self-uri><abstract><p>Автоматическая саммаризация текста – одна из ключевых задач NLP, предполагающая создание краткой версии исходного текста. В современном мире, где объемы потребляемой человеком информации неустанно растут, задаче саммаризации уделяется все больше внимания. Автореферирование предполагает два основных подхода: экстрактивный и абстрактивный. Последний заключается в автоматическом создании саммари текста, в котором могут содержаться слова и предложения, не встречающиеся в источнике. Этот подход зачастую требует использования нейросетевых моделей, и для его реализации необходимы большие наборы специальным образом размеченных данных. Несмотря на значительные успехи в абстрактивной саммаризации публицистических и научных текстов, методы и датасеты, используемые для работы с монологическими документами, не всегда применимы для саммаризации диалогов. Кроме того, хотя создано достаточно много англоязычных датасетов для саммаризации текстов различных доменов, существующие наборы данных для автоматического аннотирования текстов на русском языке пока немногочисленны. Настоящая статья посвящена разработке и описанию русскоязычного диалогового датасета для саммаризации сообщений в родительских чатах и последующему обучению модели абстрактивной саммаризации для русского языка на авторском наборе диалоговых данных. В качестве материала выступил родительский чат с учителем в мессенджере WhatsApp. Процесс ручной разметки датасета включал в себя разбиение всех сообщений чата на отдельные диалоги, создание саммари и присвоение тематических меток для каждого разговора. В результате был создан датасет, содержащий 616 диалогов, в общей сложности состоящих из 3380 сообщений. Для файн-тьюнинга были выбраны модели-трансформеры ruT5, mT5 и RuGPT (ruT5 и RuGPT были предварительно обучены на русскоязычном датасете для автоматической саммаризации новостей), а для оценки их качества – метрики ROUGE-1, ROUGE-2, ROUGE-L, BLEU и BERTScore. В результате модели ruT5, дообученной на авторском датасете, удалось превзойти бейзлайн по всем пяти метрикам.</p></abstract><trans-abstract xml:lang="en"><p>Automatic text summarization is one of the main tasks of natural language processing (NLP), which consists in creating a shorter version of the source text. In today’s world the amount of information consumed by people is constantly increasing, therefore more and more emphasis is being placed on the task of summarization. There are two main approaches to automatic text summarization: extractive and abstractive ones. The latter involves automatic creation of a summary text that may contain words and phrases not present in the source. This approach usually requires the usage of AI models, which creates a demand for large datasets labeled in a certain way. Despite significant advances in summarization of scientific and news articles, the methods and datasets applied to monologue documents are not always suitable for dialogue summarization. Besides, although there exists a considerable number of English-language summarization datasets, the number of those available in Russian is not yet sufficient. The paper is devoted to the labeling and description of a Russian-language dataset for group chat messages summarization and fine-tuning models for the task of abstractive summarization for the Russian language on a custom dialogue dataset. A parental chat with a teacher in WhatsApp was used as material for the dataset. The process of manually labeling the dataset consisted in dividing the entire group chat into separate dialogues, writing a summary, and adding topic labels for each of them. As a result, a dataset has been created, which includes 616 dialogues with a total of 3380 messages. The ruT5, mT5 and RuGPT models were selected for fine-tuning, the ruT5 and RuGPT models were pre-trained on a Russian-language dataset for automatic news summarization. The ROUGE–1, ROUGE-2, ROUGE-L, BLEU and BERTScore metrics were used to evaluate the quality of the models. Subsequently, the ruT5 model, fine-tuned on the custom dataset, turned out to out-perform the baseline model in all the five metrics.</p></trans-abstract><kwd-group xml:lang="ru"><kwd>автоматическая саммаризация текста</kwd><kwd>диалоговая саммаризация</kwd><kwd>машинное обучение</kwd><kwd>трансформеры</kwd><kwd>обработка естественного языка</kwd></kwd-group><kwd-group xml:lang="en"><kwd>automatic text summarization</kwd><kwd>dialogue summarization</kwd><kwd>machine learning</kwd><kwd>transformers</kwd><kwd>dataset</kwd><kwd>NLP</kwd></kwd-group><funding-group xml:lang="ru"><funding-statement>Исследование подготовлено по материалам проекта «Текст как Big Data: методы и модели работы с большими текстовыми данными», выполняемого в рамках Программы фундаментальных исследований НИУ ВШЭ в 2024 году.</funding-statement></funding-group><funding-group xml:lang="en"><funding-statement>The article is based on the materials of the project “Text as Big Data: methods and models of working with large text data”, carried out within the framework of the HSE Fundamental Research Program in 2024</funding-statement></funding-group></article-meta></front><back><ref-list><title>References</title><ref id="cit1"><label>1</label><citation-alternatives><mixed-citation xml:lang="ru">An C., Zhong M., Chen Y., Wang D., Qiu X., Huang X. Enhancing Scientific Papers Summarization with Citation Graph. Proceedings of the AAAI Conference on Artificial Intelligence, 2021, vol. 35(14), pp. 12498–12506. https://doi.org/10.1609/aaai.v35i14.17482</mixed-citation><mixed-citation xml:lang="en">An C., Zhong M., Chen Y., Wang D., Qiu X., Huang X. Enhancing Scientific Papers Summarization with Citation Graph. Proceedings of the AAAI Conference on Artificial Intelligence, 2021, vol. 35(14), pp. 12498–12506. https://doi.org/10.1609/aaai.v35i14.17482</mixed-citation></citation-alternatives></ref><ref id="cit2"><label>2</label><citation-alternatives><mixed-citation xml:lang="ru">Ben Abacha A., Yim W., Fan Y., Lin T. An Empirical Study of Clinical Note Generation from Doctor-Patient Encounters. Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics, 2023, pp. 2291–2302. Available at: https://aclanthology.org/2023.eacl-main.168.pdf (аccessed: June 23, 2024).</mixed-citation><mixed-citation xml:lang="en">Ben Abacha A., Yim W., Fan Y., Lin T. An Empirical Study of Clinical Note Generation from Doctor-Patient Encounters. Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics, 2023, pp. 2291–2302. Available at: https://aclanthology.org/2023.eacl-main.168.pdf (аccessed: June 23, 2024).</mixed-citation></citation-alternatives></ref><ref id="cit3"><label>3</label><citation-alternatives><mixed-citation xml:lang="ru">Budzianowski P., Wen T., Tseng B.H., Casanueva I., Ultes S., Ramadan O., et al. MultiWOZ – A Large-Scale Multi-Domain Wizard-of-Oz Dataset for Task-Oriented Dialogue Modelling. Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, 2018, pp. 5016–5026. https://doi.org/10.18653/v1/d18-1547</mixed-citation><mixed-citation xml:lang="en">Budzianowski P., Wen T., Tseng B.H., Casanueva I., Ultes S., Ramadan O., et al. MultiWOZ – A Large-Scale Multi-Domain Wizard-of-Oz Dataset for Task-Oriented Dialogue Modelling. Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, 2018, pp. 5016–5026. https://doi.org/10.18653/v1/d18-1547</mixed-citation></citation-alternatives></ref><ref id="cit4"><label>4</label><citation-alternatives><mixed-citation xml:lang="ru">Bylieva D., Lobatyuk V., Novikov M. Parent Chats in Education System: During and after the Pandemic Outbreak. Education Sciences, 2023, vol. 13(8), pp. 778–794. https://doi.org/10.3390/educsci13080778</mixed-citation><mixed-citation xml:lang="en">Bylieva D., Lobatyuk V., Novikov M. Parent Chats in Education System: During and after the Pandemic Outbreak. Education Sciences, 2023, vol. 13(8), pp. 778–794. https://doi.org/10.3390/educsci13080778</mixed-citation></citation-alternatives></ref><ref id="cit5"><label>5</label><citation-alternatives><mixed-citation xml:lang="ru">Carletta J., Ashby S., Bourban S., Flynn M., Guillemot M., Hain T., et al. The AMI Meeting Corpus: A Pre-announcement. Lecture Notes in Computer Science, 2006, pp. 28–39. https://doi.org/10.1007/11677482_3</mixed-citation><mixed-citation xml:lang="en">Carletta J., Ashby S., Bourban S., Flynn M., Guillemot M., Hain T., et al. The AMI Meeting Corpus: A Pre-announcement. Lecture Notes in Computer Science, 2006, pp. 28–39. https://doi.org/10.1007/11677482_3</mixed-citation></citation-alternatives></ref><ref id="cit6"><label>6</label><citation-alternatives><mixed-citation xml:lang="ru">Chen Y., Liu Y., Chen L., Zhang Y. DialogSum: A Real-Life Scenario Dialogue Summarization Dataset. Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021, 2021, pp. 5062–5074. Available at: https://aclanthology.org/2021.findings-acl.449.pdf (аccessed: June 24, 2024).</mixed-citation><mixed-citation xml:lang="en">Chen Y., Liu Y., Chen L., Zhang Y. DialogSum: A Real-Life Scenario Dialogue Summarization Dataset. Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021, 2021, pp. 5062–5074. Available at: https://aclanthology.org/2021.findings-acl.449.pdf (аccessed: June 24, 2024).</mixed-citation></citation-alternatives></ref><ref id="cit7"><label>7</label><citation-alternatives><mixed-citation xml:lang="ru">Chowdhury S.B.R., Monath N., Dubey A., Zaheer M., McCallum A., Ahmed A., Chaturvedi S. Incremental Extractive Opinion Summarization Using Cover Trees. arXiv (Cornell University). 2024. Available at: https://arxiv.org/abs/2401.08047 (аccessed: June 24, 2024).</mixed-citation><mixed-citation xml:lang="en">Chowdhury S.B.R., Monath N., Dubey A., Zaheer M., McCallum A., Ahmed A., Chaturvedi S. Incremental Extractive Opinion Summarization Using Cover Trees. arXiv (Cornell University). 2024. Available at: https://arxiv.org/abs/2401.08047 (аccessed: June 24, 2024).</mixed-citation></citation-alternatives></ref><ref id="cit8"><label>8</label><citation-alternatives><mixed-citation xml:lang="ru">Cohan A., Dernoncourt F., Kim D. S., Bui T., Kim S., Chang W., Goharian N. A Discourse-Aware Attention Model for Abstractive Summarization of Long Documents. Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, 2018, vol. 2, pp. 615–621. https://doi.org/10.18653/v1/n18-2097</mixed-citation><mixed-citation xml:lang="en">Cohan A., Dernoncourt F., Kim D. S., Bui T., Kim S., Chang W., Goharian N. A Discourse-Aware Attention Model for Abstractive Summarization of Long Documents. Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, 2018, vol. 2, pp. 615–621. https://doi.org/10.18653/v1/n18-2097</mixed-citation></citation-alternatives></ref><ref id="cit9"><label>9</label><citation-alternatives><mixed-citation xml:lang="ru">Dutta S., Chandra V., Mehra K., Ghatak S., Das A. K., Ghosh S. Summarizing Microblogs during Emergency Events: A Comparison of Extractive Summarization Algorithms. International Conference on Emerging Technologies in Data Mining and Information Security, 2018. Available at: https://www.researchgate.net/publication/325593717_Summarizing_Microblogs_during_Emergency_Events_A_Comparison_of_Extractive_Summarization_Algorithms (аccessed: June 25, 2024).</mixed-citation><mixed-citation xml:lang="en">Dutta S., Chandra V., Mehra K., Ghatak S., Das A. K., Ghosh S. Summarizing Microblogs during Emergency Events: A Comparison of Extractive Summarization Algorithms. International Conference on Emerging Technologies in Data Mining and Information Security, 2018. Available at: https://www.researchgate.net/publication/325593717_Summarizing_Microblogs_during_Emergency_Events_A_Comparison_of_Extractive_Summarization_Algorithms (аccessed: June 25, 2024).</mixed-citation></citation-alternatives></ref><ref id="cit10"><label>10</label><citation-alternatives><mixed-citation xml:lang="ru">Feigenblat G., Gunasekara R. C., Sznajder B., Joshi S., Konopnicki D., Aharonov R. TWEET-SUMM A Dialog Summarization Dataset for Customer Service. Findings of the Association for Computational Linguistics: EMNLP 2021, 2021, pp. 245–260. https://doi.org/10.18653/v1/2021.findings-emnlp.24</mixed-citation><mixed-citation xml:lang="en">Feigenblat G., Gunasekara R. C., Sznajder B., Joshi S., Konopnicki D., Aharonov R. TWEET-SUMM A Dialog Summarization Dataset for Customer Service. Findings of the Association for Computational Linguistics: EMNLP 2021, 2021, pp. 245–260. https://doi.org/10.18653/v1/2021.findings-emnlp.24</mixed-citation></citation-alternatives></ref><ref id="cit11"><label>11</label><citation-alternatives><mixed-citation xml:lang="ru">Feng X., Feng X., Qin B. A Survey on Dialogue Summarization: Recent Advances and New Frontiers. Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence, 2022, pp. 5453–5460. https://doi.org/10.24963/ijcai.2022/764</mixed-citation><mixed-citation xml:lang="en">Feng X., Feng X., Qin B. A Survey on Dialogue Summarization: Recent Advances and New Frontiers. Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence, 2022, pp. 5453–5460. https://doi.org/10.24963/ijcai.2022/764</mixed-citation></citation-alternatives></ref><ref id="cit12"><label>12</label><citation-alternatives><mixed-citation xml:lang="ru">Ghosh A., Acharya A., Jha P., Gaudgaul A., Majumdar R., Saha S., Chadha A., Jain R., Sinha S., Agarwal S. MedSUMM: A Multimodal Approach to Summarizing Code-Mixed Hindi-English clinical queries. arXiv (Cornell University). 2024. Available at: https://arxiv.org/abs/2401.01596 (аccessed: June 24, 2024).</mixed-citation><mixed-citation xml:lang="en">Ghosh A., Acharya A., Jha P., Gaudgaul A., Majumdar R., Saha S., Chadha A., Jain R., Sinha S., Agarwal S. MedSUMM: A Multimodal Approach to Summarizing Code-Mixed Hindi-English clinical queries. arXiv (Cornell University). 2024. Available at: https://arxiv.org/abs/2401.01596 (аccessed: June 24, 2024).</mixed-citation></citation-alternatives></ref><ref id="cit13"><label>13</label><citation-alternatives><mixed-citation xml:lang="ru">Gliwa B., Mochol I., Biesek M., Wawer A. SAMSum Corpus: A Human-annotated Dialogue Dataset for Abstractive Summarization. Proceedings of the 2nd Workshop on New Frontiers in Summarization, 2019, pp. 70–79. https://doi.org/10.18653/v1/d19-5409</mixed-citation><mixed-citation xml:lang="en">Gliwa B., Mochol I., Biesek M., Wawer A. SAMSum Corpus: A Human-annotated Dialogue Dataset for Abstractive Summarization. Proceedings of the 2nd Workshop on New Frontiers in Summarization, 2019, pp. 70–79. https://doi.org/10.18653/v1/d19-5409</mixed-citation></citation-alternatives></ref><ref id="cit14"><label>14</label><citation-alternatives><mixed-citation xml:lang="ru">Gusev I. Dataset for Automatic Summarization of Russian News. In: Communications in computer and information science, 2020, pp. 122–134. https://doi.org/10.1007/978-3-030-59082-6_9</mixed-citation><mixed-citation xml:lang="en">Gusev I. Dataset for Automatic Summarization of Russian News. In: Communications in computer and information science, 2020, pp. 122–134. https://doi.org/10.1007/978-3-030-59082-6_9</mixed-citation></citation-alternatives></ref><ref id="cit15"><label>15</label><citation-alternatives><mixed-citation xml:lang="ru">Hasan T., Bhattacharjee A., Islam Md. S., Mubasshir K., Li Y., Kang Y. B., Rahman S., Shahriyar R. XL-Sum: Large-Scale Multilingual Abstractive Summarization for 44 Languages. Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021, 2021, pp. 4693–703. https://doi.org/10.18653/v1/2021.findings-acl.413</mixed-citation><mixed-citation xml:lang="en">Hasan T., Bhattacharjee A., Islam Md. S., Mubasshir K., Li Y., Kang Y. B., Rahman S., Shahriyar R. XL-Sum: Large-Scale Multilingual Abstractive Summarization for 44 Languages. Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021, 2021, pp. 4693–703. https://doi.org/10.18653/v1/2021.findings-acl.413</mixed-citation></citation-alternatives></ref><ref id="cit16"><label>16</label><citation-alternatives><mixed-citation xml:lang="ru">Hermann K. M., Kočiský T., Grefenstette E., Espeholt L., Kay W., Suleyman M., Blunsom P. Teaching Machines to Read and Comprehend. Neural Information Processing Systems, 2015, vol. 28, pp. 1693–1701. Available at: http://papers.nips.cc/paper/5945-teaching-machines-to-read-and-comprehend.pdf (аccessed: June 25, 2024).</mixed-citation><mixed-citation xml:lang="en">Hermann K. M., Kočiský T., Grefenstette E., Espeholt L., Kay W., Suleyman M., Blunsom P. Teaching Machines to Read and Comprehend. Neural Information Processing Systems, 2015, vol. 28, pp. 1693–1701. Available at: http://papers.nips.cc/paper/5945-teaching-machines-to-read-and-comprehend.pdf (аccessed: June 25, 2024).</mixed-citation></citation-alternatives></ref><ref id="cit17"><label>17</label><citation-alternatives><mixed-citation xml:lang="ru">Janin A., Baron D., Edwards J. A., Ellis D. P. W., Gelbart D., Morgan N., et al. The ICSI meeting corpus. 2003 IEEE International Conference on Acoustics, Speech, and Signal Processing, 2003 Proceedings (ICASSP ’03). 2003. Available at: https://www.researchgate.net/publication/4015071_The_ICSI_meeting_corpus (аccessed: June 23, 2024).</mixed-citation><mixed-citation xml:lang="en">Janin A., Baron D., Edwards J. A., Ellis D. P. W., Gelbart D., Morgan N., et al. The ICSI meeting corpus. 2003 IEEE International Conference on Acoustics, Speech, and Signal Processing, 2003 Proceedings (ICASSP ’03). 2003. Available at: https://www.researchgate.net/publication/4015071_The_ICSI_meeting_corpus (аccessed: June 23, 2024).</mixed-citation></citation-alternatives></ref><ref id="cit18"><label>18</label><citation-alternatives><mixed-citation xml:lang="ru">Jin H., Yang Z., Meng D., Wang J., Tan J. A Comprehensive Survey on Process-Oriented Automatic Text Summarization with Exploration of LLM-Based Methods. arXiv (Cornell University). 2024. Available at: https://arxiv.org/abs/2403.02901 (аccessed: June 24, 2024).</mixed-citation><mixed-citation xml:lang="en">Jin H., Yang Z., Meng D., Wang J., Tan J. A Comprehensive Survey on Process-Oriented Automatic Text Summarization with Exploration of LLM-Based Methods. arXiv (Cornell University). 2024. Available at: https://arxiv.org/abs/2403.02901 (аccessed: June 24, 2024).</mixed-citation></citation-alternatives></ref><ref id="cit19"><label>19</label><citation-alternatives><mixed-citation xml:lang="ru">Khalman M., Zhao Y., Saleh M. ForumSum: A Multi-Speaker Conversation Summarization Dataset. Findings of the Association for Computational Linguistics: EMNLP 2021, 2021, pp. 4592– 4599. https://doi.org/10.18653/v1/2021.findings-emnlp.391</mixed-citation><mixed-citation xml:lang="en">Khalman M., Zhao Y., Saleh M. ForumSum: A Multi-Speaker Conversation Summarization Dataset. Findings of the Association for Computational Linguistics: EMNLP 2021, 2021, pp. 4592– 4599. https://doi.org/10.18653/v1/2021.findings-emnlp.391</mixed-citation></citation-alternatives></ref><ref id="cit20"><label>20</label><citation-alternatives><mixed-citation xml:lang="ru">Koupaee M., Wang W. Y. WikiHow: A Large Scale Text Summarization Dataset. arXiv (Cornell University). 2018. Available at: https://arxiv.org/pdf/1810.09305 (аccessed: June 24, 2024).</mixed-citation><mixed-citation xml:lang="en">Koupaee M., Wang W. Y. WikiHow: A Large Scale Text Summarization Dataset. arXiv (Cornell University). 2018. Available at: https://arxiv.org/pdf/1810.09305 (аccessed: June 24, 2024).</mixed-citation></citation-alternatives></ref><ref id="cit21"><label>21</label><citation-alternatives><mixed-citation xml:lang="ru">Lin C. ROUGE: A Package for Automatic Evaluation of Summaries. Text Summarization Branches Out. 2004. Available at: https://aclanthology.org/W04-1013.pdf (аccessed: June 27, 2024).</mixed-citation><mixed-citation xml:lang="en">Lin C. ROUGE: A Package for Automatic Evaluation of Summaries. Text Summarization Branches Out. 2004. Available at: https://aclanthology.org/W04-1013.pdf (аccessed: June 27, 2024).</mixed-citation></citation-alternatives></ref><ref id="cit22"><label>22</label><citation-alternatives><mixed-citation xml:lang="ru">Liu C., Wang P., Xu J., Zang L., Ye J. Automatic Dialogue Summary Generation for Customer Service. KDD ’19: Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining. 2019, pp. 1957–1965. https://doi.org/10.1145/3292500.3330683</mixed-citation><mixed-citation xml:lang="en">Liu C., Wang P., Xu J., Zang L., Ye J. Automatic Dialogue Summary Generation for Customer Service. KDD ’19: Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining. 2019, pp. 1957–1965. https://doi.org/10.1145/3292500.3330683</mixed-citation></citation-alternatives></ref><ref id="cit23"><label>23</label><citation-alternatives><mixed-citation xml:lang="ru">Liu L., Lu Y., Yang M., Qu Q., Zhu J., Li H. Generative Adversarial Network for Abstractive Text Summarization. Proceedings of the AAAI Conference on Artificial Intelligence, 2018, vol. 32(1). Available at https://arxiv.org/abs/1711.09357 (аccessed: June 25, 2024).</mixed-citation><mixed-citation xml:lang="en">Liu L., Lu Y., Yang M., Qu Q., Zhu J., Li H. Generative Adversarial Network for Abstractive Text Summarization. Proceedings of the AAAI Conference on Artificial Intelligence, 2018, vol. 32(1). Available at https://arxiv.org/abs/1711.09357 (аccessed: June 25, 2024).</mixed-citation></citation-alternatives></ref><ref id="cit24"><label>24</label><citation-alternatives><mixed-citation xml:lang="ru">Liu Y. Fine-tune BERT for Extractive Summarization. arXiv (Cornell University). 2019. Available at: https://arxiv.org/pdf/1903.10318.pdf (аccessed: June 23, 2024).</mixed-citation><mixed-citation xml:lang="en">Liu Y. Fine-tune BERT for Extractive Summarization. arXiv (Cornell University). 2019. Available at: https://arxiv.org/pdf/1903.10318.pdf (аccessed: June 23, 2024).</mixed-citation></citation-alternatives></ref><ref id="cit25"><label>25</label><citation-alternatives><mixed-citation xml:lang="ru">Luhn H. P. The Automatic Creation of Literature Abstracts. IBM Journal of Research and Development, 1958, vol. 2(2), pp. 159–165. https://doi.org/10.1147/rd.22.0159</mixed-citation><mixed-citation xml:lang="en">Luhn H. P. The Automatic Creation of Literature Abstracts. IBM Journal of Research and Development, 1958, vol. 2(2), pp. 159–165. https://doi.org/10.1147/rd.22.0159</mixed-citation></citation-alternatives></ref><ref id="cit26"><label>26</label><citation-alternatives><mixed-citation xml:lang="ru">Lyu M. R., Cheng P., Li X., Balian P., Bian J., Wu Y. Automatic Summarization of Doctor-Patient Encounter Dialogues Using Large Language Model through Prompt Tuning. arXiv (Cornell University). 2024. Available at: https://arxiv.org/abs/2403.13089 (аccessed: June 24, 2024).</mixed-citation><mixed-citation xml:lang="en">Lyu M. R., Cheng P., Li X., Balian P., Bian J., Wu Y. Automatic Summarization of Doctor-Patient Encounter Dialogues Using Large Language Model through Prompt Tuning. arXiv (Cornell University). 2024. Available at: https://arxiv.org/abs/2403.13089 (аccessed: June 24, 2024).</mixed-citation></citation-alternatives></ref><ref id="cit27"><label>27</label><citation-alternatives><mixed-citation xml:lang="ru">Malykh V., Chernis K., Artemova E., Piontkovskaya I. SumTitles: a Summarization Dataset with Low Extractiveness. Proceedings of the 28th International Conference on Computational Linguistics, 2020, pp. 5718–5730. https://doi.org/10.18653/v1/2020.coling-main.503</mixed-citation><mixed-citation xml:lang="en">Malykh V., Chernis K., Artemova E., Piontkovskaya I. SumTitles: a Summarization Dataset with Low Extractiveness. Proceedings of the 28th International Conference on Computational Linguistics, 2020, pp. 5718–5730. https://doi.org/10.18653/v1/2020.coling-main.503</mixed-citation></citation-alternatives></ref><ref id="cit28"><label>28</label><citation-alternatives><mixed-citation xml:lang="ru">Moratanch N., Gopalan С. A survey on Extractive Text Summarization. 2017 International Conference on Computer, Communication and Signal Processing (ICCCSP). 2017. Available at: https://ieeexplore.ieee.org/document/7944061 (аccessed: June 25, 2024).</mixed-citation><mixed-citation xml:lang="en">Moratanch N., Gopalan С. A survey on Extractive Text Summarization. 2017 International Conference on Computer, Communication and Signal Processing (ICCCSP). 2017. Available at: https://ieeexplore.ieee.org/document/7944061 (аccessed: June 25, 2024).</mixed-citation></citation-alternatives></ref><ref id="cit29"><label>29</label><citation-alternatives><mixed-citation xml:lang="ru">Napoles C., Gormley M. R., Van Durme B. Annotated Gigaword. Proceedings of the Joint Workshop on Automatic Knowledge Base Construction and Web-scale Knowledge Extraction (AKBC-WEKEX), 2012, pp. 95–100. Available at: https://aclanthology.org/W12-3018.pdf (аccessed: June 23, 2024).</mixed-citation><mixed-citation xml:lang="en">Napoles C., Gormley M. R., Van Durme B. Annotated Gigaword. Proceedings of the Joint Workshop on Automatic Knowledge Base Construction and Web-scale Knowledge Extraction (AKBC-WEKEX), 2012, pp. 95–100. Available at: https://aclanthology.org/W12-3018.pdf (аccessed: June 23, 2024).</mixed-citation></citation-alternatives></ref><ref id="cit30"><label>30</label><citation-alternatives><mixed-citation xml:lang="ru">Narayan S., Cohen S. B., Lapata M. Don’t Give Me the Details, Just the Summary! Topic-Aware Convolutional Neural Networks for Extreme Summarization. Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, 2018, pp. 1797–1807. https://doi.org/10.18653/v1/d18-1206</mixed-citation><mixed-citation xml:lang="en">Narayan S., Cohen S. B., Lapata M. Don’t Give Me the Details, Just the Summary! Topic-Aware Convolutional Neural Networks for Extreme Summarization. Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, 2018, pp. 1797–1807. https://doi.org/10.18653/v1/d18-1206</mixed-citation></citation-alternatives></ref><ref id="cit31"><label>31</label><citation-alternatives><mixed-citation xml:lang="ru">Nedoluzhko A., Singh M., Hledíková M., Ghosal T., Bojar O. ELITR Minuting Corpus: A Novel Dataset for Automatic Minuting from Multi-Party Meetings in English and Czech. Proceedings of the Thirteenth Language Resources and Evaluation Conference, 2022, pp. 3174–3182. Available at: https://aclanthology.org/2022.lrec-1.340/ (аccessed: June 23, 2024).</mixed-citation><mixed-citation xml:lang="en">Nedoluzhko A., Singh M., Hledíková M., Ghosal T., Bojar O. ELITR Minuting Corpus: A Novel Dataset for Automatic Minuting from Multi-Party Meetings in English and Czech. Proceedings of the Thirteenth Language Resources and Evaluation Conference, 2022, pp. 3174–3182. Available at: https://aclanthology.org/2022.lrec-1.340/ (аccessed: June 23, 2024).</mixed-citation></citation-alternatives></ref><ref id="cit32"><label>32</label><citation-alternatives><mixed-citation xml:lang="ru">Papineni K., Roukos S., Ward T., Zhu W. BLEU: a Method for Automatic Evaluation of Machine Translation. Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics (ACL), 2002, pp. 311–318. Available at: https://aclanthology.org/P02-1040.pdf (аccessed: June 27, 2024).</mixed-citation><mixed-citation xml:lang="en">Papineni K., Roukos S., Ward T., Zhu W. BLEU: a Method for Automatic Evaluation of Machine Translation. Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics (ACL), 2002, pp. 311–318. Available at: https://aclanthology.org/P02-1040.pdf (аccessed: June 27, 2024).</mixed-citation></citation-alternatives></ref><ref id="cit33"><label>33</label><citation-alternatives><mixed-citation xml:lang="ru">Rameshkumar R., Bailey P. Storytelling with Dialogue: A Critical Role Dungeons and Dragons Dataset. Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, 2020, pp. 5121–34. https://doi.org/10.18653/v1/2020.acl-main.459</mixed-citation><mixed-citation xml:lang="en">Rameshkumar R., Bailey P. Storytelling with Dialogue: A Critical Role Dungeons and Dragons Dataset. Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, 2020, pp. 5121–34. https://doi.org/10.18653/v1/2020.acl-main.459</mixed-citation></citation-alternatives></ref><ref id="cit34"><label>34</label><citation-alternatives><mixed-citation xml:lang="ru">Shukla A., Bhattacharya P., Poddar S., Mukherjee R., Ghosh K., Goyal P., Ghosh S. Legal Case Document Summarization: Extractive and Abstractive Methods and their Evaluation. arXiv (Cornell University), 2022. Available at: https://arxiv.org/abs/2210.07544 (аccessed: June 22, 2024).</mixed-citation><mixed-citation xml:lang="en">Shukla A., Bhattacharya P., Poddar S., Mukherjee R., Ghosh K., Goyal P., Ghosh S. Legal Case Document Summarization: Extractive and Abstractive Methods and their Evaluation. arXiv (Cornell University), 2022. Available at: https://arxiv.org/abs/2210.07544 (аccessed: June 22, 2024).</mixed-citation></citation-alternatives></ref><ref id="cit35"><label>35</label><citation-alternatives><mixed-citation xml:lang="ru">Zhang S., Çelikyılmaz A., Gao J., Bansal M. EmailSum: Abstractive Email Thread Summarization. Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing, 2021, vol. 1, pp. 6895–6909. https://doi.org/10.18653/v1/2021.acl-long.537</mixed-citation><mixed-citation xml:lang="en">Zhang S., Çelikyılmaz A., Gao J., Bansal M. EmailSum: Abstractive Email Thread Summarization. Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing, 2021, vol. 1, pp. 6895–6909. https://doi.org/10.18653/v1/2021.acl-long.537</mixed-citation></citation-alternatives></ref><ref id="cit36"><label>36</label><citation-alternatives><mixed-citation xml:lang="ru">Zhang T., Kishore V., Wu F., Weinberger K. Q., Artzi Y. BERTScore: Evaluating Text Generation with BERT. arXiv (Cornell University), 2020. Available at: https://arxiv.org/pdf/1904.09675 (аccessed: June 27, 2024).</mixed-citation><mixed-citation xml:lang="en">Zhang T., Kishore V., Wu F., Weinberger K. Q., Artzi Y. BERTScore: Evaluating Text Generation with BERT. arXiv (Cornell University), 2020. Available at: https://arxiv.org/pdf/1904.09675 (аccessed: June 27, 2024).</mixed-citation></citation-alternatives></ref><ref id="cit37"><label>37</label><citation-alternatives><mixed-citation xml:lang="ru">Zhong M., Yin D., Yu T., Zaidi A., Mutuma M., Jha R., et al. QMSum: A New Benchmark for Query-based Multi-domain Meeting Summarization. Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, 2021, pp. 5905–5921. https://doi.org/10.18653/v1/2021.naacl-main.472</mixed-citation><mixed-citation xml:lang="en">Zhong M., Yin D., Yu T., Zaidi A., Mutuma M., Jha R., et al. QMSum: A New Benchmark for Query-based Multi-domain Meeting Summarization. Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, 2021, pp. 5905–5921. https://doi.org/10.18653/v1/2021.naacl-main.472</mixed-citation></citation-alternatives></ref><ref id="cit38"><label>38</label><citation-alternatives><mixed-citation xml:lang="ru">Zhu C., Liu Y., Mei J., Zeng M. MediaSum: A Large-scale Media Interview Dataset for Dialogue Summarization. Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, 2021, pp. 5927–5934. https://doi.org/10.18653/v1/2021.naacl-main.474</mixed-citation><mixed-citation xml:lang="en">Zhu C., Liu Y., Mei J., Zeng M. MediaSum: A Large-scale Media Interview Dataset for Dialogue Summarization. Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, 2021, pp. 5927–5934. https://doi.org/10.18653/v1/2021.naacl-main.474</mixed-citation></citation-alternatives></ref></ref-list><fn-group><fn fn-type="conflict"><p>The authors declare that there are no conflicts of interest present.</p></fn></fn-group></back></article>
