<?xml version="1.0" encoding="ISO-8859-1"?><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<front>
<journal-meta>
<journal-id>1405-5546</journal-id>
<journal-title><![CDATA[Computación y Sistemas]]></journal-title>
<abbrev-journal-title><![CDATA[Comp. y Sist.]]></abbrev-journal-title>
<issn>1405-5546</issn>
<publisher>
<publisher-name><![CDATA[Instituto Politécnico Nacional, Centro de Investigación en Computación]]></publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id>S1405-55462019000300649</article-id>
<article-id pub-id-type="doi">10.13053/cys-23-3-3256</article-id>
<title-group>
<article-title xml:lang="en"><![CDATA[Central Embeddings for Extractive Summarization Based on Similarity]]></article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname><![CDATA[Gutiérrez-Hinojosa]]></surname>
<given-names><![CDATA[Sandra J.]]></given-names>
</name>
<xref ref-type="aff" rid="Aff"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname><![CDATA[Calvo]]></surname>
<given-names><![CDATA[Hiram]]></given-names>
</name>
<xref ref-type="aff" rid="Aff"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname><![CDATA[Moreno-Armendáriz]]></surname>
<given-names><![CDATA[Marco A.]]></given-names>
</name>
<xref ref-type="aff" rid="Aff"/>
</contrib>
</contrib-group>
<aff id="Af1">
<institution><![CDATA[,Instituto Politécnico Nacional Centro de Investigación en Computación ]]></institution>
<addr-line><![CDATA[Mexico City ]]></addr-line>
<country>Mexico</country>
</aff>
<pub-date pub-type="pub">
<day>00</day>
<month>09</month>
<year>2019</year>
</pub-date>
<pub-date pub-type="epub">
<day>00</day>
<month>09</month>
<year>2019</year>
</pub-date>
<volume>23</volume>
<numero>3</numero>
<fpage>649</fpage>
<lpage>663</lpage>
<copyright-statement/>
<copyright-year/>
<self-uri xlink:href="http://www.scielo.org.mx/scielo.php?script=sci_arttext&amp;pid=S1405-55462019000300649&amp;lng=en&amp;nrm=iso"></self-uri><self-uri xlink:href="http://www.scielo.org.mx/scielo.php?script=sci_abstract&amp;pid=S1405-55462019000300649&amp;lng=en&amp;nrm=iso"></self-uri><self-uri xlink:href="http://www.scielo.org.mx/scielo.php?script=sci_pdf&amp;pid=S1405-55462019000300649&amp;lng=en&amp;nrm=iso"></self-uri><abstract abstract-type="short" xml:lang="en"><p><![CDATA[Abstract In this work we propose using word embeddings combined with unsupervised methods such as clustering for the multi-document summarization task of DUC (Document Understanding Conference) 2002. We aim to find evidence that semantic information is kept in word embeddings and this representation is subject to be grouped based on their similarity, so that main ideas can be identified in sets of documents. We experiment with different clustering methods to extract candidates for the multi-document summarization task. Our experiments show that our method is able to find the prevalent ideas. ROUGE measures of our experiments are similar to the state of the art, despite the fact that not all the main ideas are found; as our method does not require annotated resources, it provides a domain and language independent way to create a summary.]]></p></abstract>
<kwd-group>
<kwd lng="en"><![CDATA[Extractive summarization]]></kwd>
<kwd lng="en"><![CDATA[prevalent ideas extraction]]></kwd>
<kwd lng="en"><![CDATA[concept similarity]]></kwd>
<kwd lng="en"><![CDATA[central embeddings]]></kwd>
<kwd lng="en"><![CDATA[DUC 2002]]></kwd>
</kwd-group>
</article-meta>
</front><back>
<ref-list>
<ref id="B1">
<label>1</label><nlm-citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Bengio]]></surname>
<given-names><![CDATA[Y.]]></given-names>
</name>
<name>
<surname><![CDATA[Ducharme]]></surname>
<given-names><![CDATA[R.]]></given-names>
</name>
<name>
<surname><![CDATA[Vincent]]></surname>
<given-names><![CDATA[P.]]></given-names>
</name>
<name>
<surname><![CDATA[Jauvin]]></surname>
<given-names><![CDATA[C.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[A neural probabilistic language model]]></article-title>
<source><![CDATA[Journal of machine learning research]]></source>
<year>2003</year>
<volume>3</volume>
<page-range>1137-55</page-range></nlm-citation>
</ref>
<ref id="B2">
<label>2</label><nlm-citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Collobert]]></surname>
<given-names><![CDATA[R.]]></given-names>
</name>
<name>
<surname><![CDATA[Weston]]></surname>
<given-names><![CDATA[J.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[A unified architecture for natural language processing: Deep neural networks with multitask learning]]></article-title>
<source><![CDATA[Proceedings of the 25th international conference on Machine learning]]></source>
<year>2008</year>
<page-range>160-7</page-range><publisher-name><![CDATA[ACM]]></publisher-name>
</nlm-citation>
</ref>
<ref id="B3">
<label>3</label><nlm-citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Fiori]]></surname>
<given-names><![CDATA[A.]]></given-names>
</name>
</person-group>
<source><![CDATA[Innovative document summarization techniques: Revolutionizing knowledge understanding]]></source>
<year>2014</year>
<publisher-loc><![CDATA[Philadelphia ]]></publisher-loc>
<publisher-name><![CDATA[IGI Global]]></publisher-name>
</nlm-citation>
</ref>
<ref id="B4">
<label>4</label><nlm-citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Gambhir]]></surname>
<given-names><![CDATA[M.]]></given-names>
</name>
<name>
<surname><![CDATA[Gupta]]></surname>
<given-names><![CDATA[V.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[Recent automatic text summarization techniques: a survey]]></article-title>
<source><![CDATA[Artificial Intelligence Review]]></source>
<year>2017</year>
<numero>1</numero>
<issue>1</issue>
<page-range>1-66</page-range></nlm-citation>
</ref>
<ref id="B5">
<label>5</label><nlm-citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Gomaa]]></surname>
<given-names><![CDATA[W. H.]]></given-names>
</name>
<name>
<surname><![CDATA[Fahmy]]></surname>
<given-names><![CDATA[A. A.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[A survey of text similarity approaches]]></article-title>
<source><![CDATA[International Journal of Computer Applications]]></source>
<year>2013</year>
<volume>68</volume>
<numero>13</numero>
<issue>13</issue>
</nlm-citation>
</ref>
<ref id="B6">
<label>6</label><nlm-citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Gupta]]></surname>
<given-names><![CDATA[V.]]></given-names>
</name>
<name>
<surname><![CDATA[Lehal]]></surname>
<given-names><![CDATA[G. S.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[A survey of text summarization extractive techniques]]></article-title>
<source><![CDATA[Journal of emerging technologies in web intelligence]]></source>
<year>2010</year>
<volume>2</volume>
<numero>3</numero>
<issue>3</issue>
<page-range>258-68</page-range></nlm-citation>
</ref>
<ref id="B7">
<label>7</label><nlm-citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname><![CDATA[John]]></surname>
<given-names><![CDATA[A.]]></given-names>
</name>
<name>
<surname><![CDATA[Premjith]]></surname>
<given-names><![CDATA[P.]]></given-names>
</name>
<name>
<surname><![CDATA[Wilscy]]></surname>
<given-names><![CDATA[M.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[Extractive multi-document summarization using population-based multicriteria optimization]]></article-title>
<source><![CDATA[Expert Systems with Applications]]></source>
<year>2017</year>
<volume>86</volume>
<page-range>385-97</page-range></nlm-citation>
</ref>
<ref id="B8">
<label>8</label><nlm-citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Jones]]></surname>
<given-names><![CDATA[K. S.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[Automatic summarising: The state of the art]]></article-title>
<source><![CDATA[Information Processing &amp; Management]]></source>
<year>2007</year>
<volume>43</volume>
<numero>6</numero>
<issue>6</issue>
<page-range>1449-81</page-range></nlm-citation>
</ref>
<ref id="B9">
<label>9</label><nlm-citation citation-type="">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Kågebäck]]></surname>
<given-names><![CDATA[M.]]></given-names>
</name>
<name>
<surname><![CDATA[Mogren]]></surname>
<given-names><![CDATA[O.]]></given-names>
</name>
<name>
<surname><![CDATA[Tahmasebi]]></surname>
<given-names><![CDATA[N.]]></given-names>
</name>
<name>
<surname><![CDATA[Dubhashi]]></surname>
<given-names><![CDATA[D.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[Extractive summarization using continuous vector space models]]></article-title>
<source><![CDATA[Proceedings of the 2nd Workshop on Continuous Vector Space Models and their Compositionality (CVSC)@ EACL]]></source>
<year>2014</year>
<page-range>31-9</page-range></nlm-citation>
</ref>
<ref id="B10">
<label>10</label><nlm-citation citation-type="">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Kobayashi]]></surname>
<given-names><![CDATA[H.]]></given-names>
</name>
<name>
<surname><![CDATA[Noguchi]]></surname>
<given-names><![CDATA[M.]]></given-names>
</name>
<name>
<surname><![CDATA[Yatsuka]]></surname>
<given-names><![CDATA[T.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[Summarization based on embedding distributions]]></article-title>
<source><![CDATA[Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing]]></source>
<year>2015</year>
<page-range>1984-9</page-range></nlm-citation>
</ref>
<ref id="B11">
<label>11</label><nlm-citation citation-type="">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Lau]]></surname>
<given-names><![CDATA[J. H.]]></given-names>
</name>
<name>
<surname><![CDATA[Baldwin]]></surname>
<given-names><![CDATA[T.]]></given-names>
</name>
</person-group>
<source><![CDATA[An empirical evaluation of doc2vec with practical insights into document embedding generation]]></source>
<year>2016</year>
</nlm-citation>
</ref>
<ref id="B12">
<label>12</label><nlm-citation citation-type="">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Le]]></surname>
<given-names><![CDATA[Q.]]></given-names>
</name>
<name>
<surname><![CDATA[Mikolov]]></surname>
<given-names><![CDATA[T.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[Distributed representations of sentences and documents]]></article-title>
<source><![CDATA[International Conference on Machine Learning]]></source>
<year>2014</year>
<page-range>1188-96</page-range></nlm-citation>
</ref>
<ref id="B13">
<label>13</label><nlm-citation citation-type="confpro">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Lin]]></surname>
<given-names><![CDATA[C. Y.]]></given-names>
</name>
</person-group>
<source><![CDATA[Looking for a few good metrics: Automatic summarization evaluation - how many samples are enough?]]></source>
<year>2004</year>
<conf-name><![CDATA[ Proceedings of the NTCIR Workshop 4]]></conf-name>
<conf-loc> </conf-loc>
</nlm-citation>
</ref>
<ref id="B14">
<label>14</label><nlm-citation citation-type="confpro">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Lin]]></surname>
<given-names><![CDATA[C.-Y.]]></given-names>
</name>
</person-group>
<source><![CDATA[Rouge: A package for automatic evaluation of summaries]]></source>
<year>2004</year>
<conf-name><![CDATA[ Text Summarization Branches Out]]></conf-name>
<conf-loc> </conf-loc>
</nlm-citation>
</ref>
<ref id="B15">
<label>15</label><nlm-citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Lin]]></surname>
<given-names><![CDATA[C.-Y.]]></given-names>
</name>
<name>
<surname><![CDATA[Hovy]]></surname>
<given-names><![CDATA[E.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[Automatic evaluation of summaries using n-gram co-occurrence statistics]]></article-title>
<source><![CDATA[Proceedings of the 2003 Conference of the North American Chapter of the Association for Computational Linguistics on Human Language Technology-Volume 1]]></source>
<year>2003</year>
<page-range>71-8</page-range><publisher-name><![CDATA[Association for Computational Linguistics]]></publisher-name>
</nlm-citation>
</ref>
<ref id="B16">
<label>16</label><nlm-citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Luhn]]></surname>
<given-names><![CDATA[H. P.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[The automatic creation of literature abstracts]]></article-title>
<source><![CDATA[IBM Journal of research and development]]></source>
<year>1958</year>
<volume>2</volume>
<numero>2</numero>
<issue>2</issue>
<page-range>159-65</page-range></nlm-citation>
</ref>
<ref id="B17">
<label>17</label><nlm-citation citation-type="">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Mikolov]]></surname>
<given-names><![CDATA[T.]]></given-names>
</name>
<name>
<surname><![CDATA[Chen]]></surname>
<given-names><![CDATA[K.]]></given-names>
</name>
<name>
<surname><![CDATA[Corrado]]></surname>
<given-names><![CDATA[G.]]></given-names>
</name>
<name>
<surname><![CDATA[Dean]]></surname>
<given-names><![CDATA[J.]]></given-names>
</name>
</person-group>
<source><![CDATA[Efficient estimation of word representations in vector space]]></source>
<year>2013</year>
</nlm-citation>
</ref>
<ref id="B18">
<label>18</label><nlm-citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Mikolov]]></surname>
<given-names><![CDATA[T.]]></given-names>
</name>
<name>
<surname><![CDATA[Sutskever]]></surname>
<given-names><![CDATA[I.]]></given-names>
</name>
<name>
<surname><![CDATA[Chen]]></surname>
<given-names><![CDATA[K.]]></given-names>
</name>
<name>
<surname><![CDATA[Corrado]]></surname>
<given-names><![CDATA[G.]]></given-names>
</name>
<name>
<surname><![CDATA[Dean]]></surname>
<given-names><![CDATA[J.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[Distributed representations of words and phrases and their compositionality]]></article-title>
<source><![CDATA[Proceedings of the 26th International Conference on Neural Information Processing Systems -Volume 2]]></source>
<year>2013</year>
<page-range>3111-9</page-range><publisher-loc><![CDATA[USA ]]></publisher-loc>
<publisher-name><![CDATA[Curran Associates Inc]]></publisher-name>
</nlm-citation>
</ref>
<ref id="B19">
<label>19</label><nlm-citation citation-type="">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Porter]]></surname>
<given-names><![CDATA[M. F.]]></given-names>
</name>
</person-group>
<source><![CDATA[Snowball: A language for stemming algorithms]]></source>
<year>2001</year>
</nlm-citation>
</ref>
<ref id="B20">
<label>20</label><nlm-citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Radev]]></surname>
<given-names><![CDATA[D. R.]]></given-names>
</name>
<name>
<surname><![CDATA[Jing]]></surname>
<given-names><![CDATA[H.]]></given-names>
</name>
<name>
<surname><![CDATA[Stys]]></surname>
<given-names><![CDATA[M.]]></given-names>
</name>
<name>
<surname><![CDATA[Tam]]></surname>
<given-names><![CDATA[D.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[Centroid-based summarization of multiple documents]]></article-title>
<source><![CDATA[Information Processing&amp; Management]]></source>
<year>2004</year>
<volume>40</volume>
<numero>6</numero>
<issue>6</issue>
<page-range>919-38</page-range></nlm-citation>
</ref>
<ref id="B21">
<label>21</label><nlm-citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Rehurek]]></surname>
<given-names><![CDATA[R.]]></given-names>
</name>
<name>
<surname><![CDATA[Sojka]]></surname>
<given-names><![CDATA[P.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[Software framework for topic modelling with large corpora]]></article-title>
<source><![CDATA[Proceedings of the LREC 2010 Workshop on New Challenges for NLP Frameworks]]></source>
<year>2010</year>
<publisher-name><![CDATA[Citeseer]]></publisher-name>
</nlm-citation>
</ref>
<ref id="B22">
<label>22</label><nlm-citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Thompson]]></surname>
<given-names><![CDATA[K.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[Programming techniques: Regular expression search algorithm]]></article-title>
<source><![CDATA[Communications of the ACM]]></source>
<year>1968</year>
<volume>11</volume>
<numero>6</numero>
<issue>6</issue>
<page-range>419-22</page-range></nlm-citation>
</ref>
<ref id="B23">
<label>23</label><nlm-citation citation-type="">
<person-group person-group-type="author">
<name>
<surname><![CDATA[van Halteren]]></surname>
<given-names><![CDATA[H.]]></given-names>
</name>
</person-group>
<source><![CDATA[Writing style recognition and sentence extraction]]></source>
<year>2002</year>
</nlm-citation>
</ref>
<ref id="B24">
<label>24</label><nlm-citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Wang]]></surname>
<given-names><![CDATA[D.]]></given-names>
</name>
<name>
<surname><![CDATA[Li]]></surname>
<given-names><![CDATA[T.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[Weighted consensus multi-document summarization]]></article-title>
<source><![CDATA[Information Processing &amp; Management]]></source>
<year>2012</year>
<volume>48</volume>
<numero>3</numero>
<issue>3</issue>
<page-range>513-23</page-range></nlm-citation>
</ref>
<ref id="B25">
<label>25</label><nlm-citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Wang]]></surname>
<given-names><![CDATA[D.]]></given-names>
</name>
<name>
<surname><![CDATA[Zhu]]></surname>
<given-names><![CDATA[S.]]></given-names>
</name>
<name>
<surname><![CDATA[Li]]></surname>
<given-names><![CDATA[T.]]></given-names>
</name>
<name>
<surname><![CDATA[Gong]]></surname>
<given-names><![CDATA[Y.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[Multi-document summarization using sentence-based topic models]]></article-title>
<source><![CDATA[Proceedings of the ACL-IJCNLP 2009 Conference Short Papers]]></source>
<year>2009</year>
<page-range>297-300</page-range><publisher-name><![CDATA[Association for Computational Linguistics]]></publisher-name>
</nlm-citation>
</ref>
<ref id="B26">
<label>26</label><nlm-citation citation-type="">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Zipf]]></surname>
<given-names><![CDATA[G. K.]]></given-names>
</name>
</person-group>
<source><![CDATA[Human behaviour and the principle of least-effort]]></source>
<year>1949</year>
<publisher-loc><![CDATA[cambridge ma edn ]]></publisher-loc>
</nlm-citation>
</ref>
</ref-list>
</back>
</article>
