<?xml version="1.0" encoding="ISO-8859-1"?><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<front>
<journal-meta>
<journal-id>1405-5546</journal-id>
<journal-title><![CDATA[Computación y Sistemas]]></journal-title>
<abbrev-journal-title><![CDATA[Comp. y Sist.]]></abbrev-journal-title>
<issn>1405-5546</issn>
<publisher>
<publisher-name><![CDATA[Instituto Politécnico Nacional, Centro de Investigación en Computación]]></publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id>S1405-55462024000402139</article-id>
<article-id pub-id-type="doi">10.13053/cys-28-4-5302</article-id>
<title-group>
<article-title xml:lang="en"><![CDATA[Optimal Clustering of Central Bank Role Profile Descriptions]]></article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname><![CDATA[Wade]]></surname>
<given-names><![CDATA[Aidan]]></given-names>
</name>
<xref ref-type="aff" rid="Aff"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname><![CDATA[Hofmann]]></surname>
<given-names><![CDATA[Markus]]></given-names>
</name>
<xref ref-type="aff" rid="Aff"/>
</contrib>
</contrib-group>
<aff id="Af1">
<institution><![CDATA[,Central Bank of Ireland  ]]></institution>
<addr-line><![CDATA[ ]]></addr-line>
<country>Ireland</country>
</aff>
<aff id="Af2">
<institution><![CDATA[,TU Dublin Department of Informatics ]]></institution>
<addr-line><![CDATA[ ]]></addr-line>
<country>Ireland</country>
</aff>
<pub-date pub-type="pub">
<day>00</day>
<month>12</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="epub">
<day>00</day>
<month>12</month>
<year>2024</year>
</pub-date>
<volume>28</volume>
<numero>4</numero>
<fpage>2139</fpage>
<lpage>2152</lpage>
<copyright-statement/>
<copyright-year/>
<self-uri xlink:href="http://www.scielo.org.mx/scielo.php?script=sci_arttext&amp;pid=S1405-55462024000402139&amp;lng=en&amp;nrm=iso"></self-uri><self-uri xlink:href="http://www.scielo.org.mx/scielo.php?script=sci_abstract&amp;pid=S1405-55462024000402139&amp;lng=en&amp;nrm=iso"></self-uri><self-uri xlink:href="http://www.scielo.org.mx/scielo.php?script=sci_pdf&amp;pid=S1405-55462024000402139&amp;lng=en&amp;nrm=iso"></self-uri><abstract abstract-type="short" xml:lang="en"><p><![CDATA[Abstract: The Central Bank of Ireland has a set of role profiles used when recruiting new staff but which also contain information about the current skill levels in the bank and which could support project planning. The roles are manually created according to a semi-structured template and the volume of roles makes them increasingly hard to manage, requiring an NLP solution for finding similar roles and applying an appropriate grouping. Different pre-processing and dimension reduction methods are tested using K-Means and Agglomerative Clustering (HAC) with clustering metrics Davies-Bouldin and Silhouette. This suggests an optimal number of clusters in the range 70 to 130 but the correct value is subjective and requires subject matter expertise.]]></p></abstract>
<kwd-group>
<kwd lng="en"><![CDATA[NLP]]></kwd>
<kwd lng="en"><![CDATA[clustering]]></kwd>
<kwd lng="en"><![CDATA[K-means]]></kwd>
<kwd lng="en"><![CDATA[agglomerative clustering]]></kwd>
<kwd lng="en"><![CDATA[role descriptions]]></kwd>
</kwd-group>
</article-meta>
</front><back>
<ref-list>
<ref id="B1">
<nlm-citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Bafna]]></surname>
<given-names><![CDATA[P.]]></given-names>
</name>
<name>
<surname><![CDATA[Shirwaikar]]></surname>
<given-names><![CDATA[S.]]></given-names>
</name>
<name>
<surname><![CDATA[Pramod]]></surname>
<given-names><![CDATA[D.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[Task recommender system using semantic clustering to identify the right personnel]]></article-title>
<source><![CDATA[VINE Journal of Information and Knowledge Management Systems]]></source>
<year>2019</year>
<volume>49</volume>
<numero>2</numero>
<issue>2</issue>
<page-range>181-99</page-range></nlm-citation>
</ref>
<ref id="B2">
<nlm-citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Bird]]></surname>
<given-names><![CDATA[S.]]></given-names>
</name>
<name>
<surname><![CDATA[Klein]]></surname>
<given-names><![CDATA[E.]]></given-names>
</name>
<name>
<surname><![CDATA[Loper]]></surname>
<given-names><![CDATA[E.]]></given-names>
</name>
</person-group>
<source><![CDATA[Natural language processing with Python: Analyzing text with the natural language toolkit]]></source>
<year>2009</year>
<publisher-name><![CDATA[O&#8217;Reilly]]></publisher-name>
</nlm-citation>
</ref>
<ref id="B3">
<nlm-citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Debao]]></surname>
<given-names><![CDATA[D.]]></given-names>
</name>
<name>
<surname><![CDATA[Yinxia]]></surname>
<given-names><![CDATA[M.]]></given-names>
</name>
<name>
<surname><![CDATA[Min]]></surname>
<given-names><![CDATA[Z.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[Analysis of big data job requirements based on k-means text clustering in China]]></article-title>
<source><![CDATA[PLOS ONE]]></source>
<year>2021</year>
<volume>16</volume>
<numero>8</numero>
<issue>8</issue>
<page-range>1-14</page-range></nlm-citation>
</ref>
<ref id="B4">
<nlm-citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Hickman]]></surname>
<given-names><![CDATA[L.]]></given-names>
</name>
<name>
<surname><![CDATA[Thapa]]></surname>
<given-names><![CDATA[S.]]></given-names>
</name>
<name>
<surname><![CDATA[Tay]]></surname>
<given-names><![CDATA[L.]]></given-names>
</name>
<name>
<surname><![CDATA[Cao]]></surname>
<given-names><![CDATA[M.]]></given-names>
</name>
<name>
<surname><![CDATA[Srinivasan]]></surname>
<given-names><![CDATA[P.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[Text preprocessing for text mining in organizational research: Review and recommendations]]></article-title>
<source><![CDATA[Organizational Research Methods]]></source>
<year>2022</year>
<volume>25</volume>
<numero>1</numero>
<issue>1</issue>
<page-range>114-46</page-range></nlm-citation>
</ref>
<ref id="B5">
<nlm-citation citation-type="confpro">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Jacksi]]></surname>
<given-names><![CDATA[K.]]></given-names>
</name>
<name>
<surname><![CDATA[Ibrahim]]></surname>
<given-names><![CDATA[R. K.]]></given-names>
</name>
<name>
<surname><![CDATA[Zeebaree]]></surname>
<given-names><![CDATA[S. R. M.]]></given-names>
</name>
<name>
<surname><![CDATA[Zebari]]></surname>
<given-names><![CDATA[R. R.]]></given-names>
</name>
<name>
<surname><![CDATA[Sadeeq]]></surname>
<given-names><![CDATA[M. A. M.]]></given-names>
</name>
</person-group>
<source><![CDATA[Clustering documents based on semantic similarity using HAC and k-Mean algorithms]]></source>
<year>2020</year>
<conf-name><![CDATA[ 2020 International Conference on Advanced Science and Engineering]]></conf-name>
<conf-loc> </conf-loc>
<page-range>205-10</page-range></nlm-citation>
</ref>
<ref id="B6">
<nlm-citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Kwale]]></surname>
<given-names><![CDATA[F. M.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[A critical review of k means text clustering algorithms]]></article-title>
<source><![CDATA[International Journal of Advanced Research in Computer Science]]></source>
<year>2013</year>
<volume>4</volume>
<numero>9</numero>
<issue>9</issue>
<page-range>27-34</page-range></nlm-citation>
</ref>
<ref id="B7">
<nlm-citation citation-type="confpro">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Liu]]></surname>
<given-names><![CDATA[F.]]></given-names>
</name>
<name>
<surname><![CDATA[Xiong]]></surname>
<given-names><![CDATA[L.]]></given-names>
</name>
</person-group>
<source><![CDATA[Survey on text clustering algorithm -research present situation of text clustering algorithm]]></source>
<year>2011</year>
<conf-name><![CDATA[ 2nd International Conference on Software Engineering and Service Science]]></conf-name>
<conf-date>2011</conf-date>
<conf-loc> </conf-loc>
<page-range>196-9</page-range></nlm-citation>
</ref>
<ref id="B8">
<nlm-citation citation-type="confpro">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Megasari]]></surname>
<given-names><![CDATA[R.]]></given-names>
</name>
<name>
<surname><![CDATA[Piantari]]></surname>
<given-names><![CDATA[E.]]></given-names>
</name>
<name>
<surname><![CDATA[Nugraha]]></surname>
<given-names><![CDATA[R.]]></given-names>
</name>
</person-group>
<source><![CDATA[Graduates profile mapping based on job vacancy information clustering]]></source>
<year>2020</year>
<conf-name><![CDATA[ 6th International Conference on Science in Information Technology]]></conf-name>
<conf-date>2020</conf-date>
<conf-loc> </conf-loc>
<page-range>35-9</page-range></nlm-citation>
</ref>
<ref id="B9">
<nlm-citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Pedregosa]]></surname>
<given-names><![CDATA[F.]]></given-names>
</name>
<name>
<surname><![CDATA[Varoquaux]]></surname>
<given-names><![CDATA[G.]]></given-names>
</name>
<name>
<surname><![CDATA[Gramfort]]></surname>
<given-names><![CDATA[A.]]></given-names>
</name>
<name>
<surname><![CDATA[Michel]]></surname>
<given-names><![CDATA[V.]]></given-names>
</name>
<name>
<surname><![CDATA[Thirion]]></surname>
<given-names><![CDATA[B.]]></given-names>
</name>
<name>
<surname><![CDATA[Grisel]]></surname>
<given-names><![CDATA[O.]]></given-names>
</name>
<name>
<surname><![CDATA[Blondel]]></surname>
<given-names><![CDATA[M.]]></given-names>
</name>
<name>
<surname><![CDATA[Prettenhofer]]></surname>
<given-names><![CDATA[P.]]></given-names>
</name>
<name>
<surname><![CDATA[Weiss]]></surname>
<given-names><![CDATA[R.]]></given-names>
</name>
<name>
<surname><![CDATA[Dubourg]]></surname>
<given-names><![CDATA[V.]]></given-names>
</name>
<name>
<surname><![CDATA[Vanderplas]]></surname>
<given-names><![CDATA[J.]]></given-names>
</name>
<name>
<surname><![CDATA[Passos]]></surname>
<given-names><![CDATA[A.]]></given-names>
</name>
<name>
<surname><![CDATA[Cournapeau]]></surname>
<given-names><![CDATA[D.]]></given-names>
</name>
<name>
<surname><![CDATA[Brucher]]></surname>
<given-names><![CDATA[M.]]></given-names>
</name>
<name>
<surname><![CDATA[Perrot]]></surname>
<given-names><![CDATA[M.]]></given-names>
</name>
<name>
<surname><![CDATA[Duchesnay]]></surname>
<given-names><![CDATA[E.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[Scikit-learn: Machine learning in Python]]></article-title>
<source><![CDATA[Journal of Machine Learning Research]]></source>
<year>2011</year>
<volume>12</volume>
<page-range>2825-30</page-range></nlm-citation>
</ref>
<ref id="B10">
<nlm-citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Siswipraptini]]></surname>
<given-names><![CDATA[P. C.]]></given-names>
</name>
<name>
<surname><![CDATA[Warnars]]></surname>
<given-names><![CDATA[H. L. H. S.]]></given-names>
</name>
<name>
<surname><![CDATA[Ramadhan]]></surname>
<given-names><![CDATA[A.]]></given-names>
</name>
<name>
<surname><![CDATA[Budiharto]]></surname>
<given-names><![CDATA[W.]]></given-names>
</name>
</person-group>
<article-title xml:lang=""><![CDATA[Information technology job profile using average-linkage hierarchical clustering analysis]]></article-title>
<source><![CDATA[IEEE Access]]></source>
<year>2023</year>
<volume>11</volume>
<page-range>94647-63</page-range></nlm-citation>
</ref>
</ref-list>
</back>
</article>
