<?xml version="1.0" encoding="ISO-8859-1"?><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<front>
<journal-meta>
<journal-id>1405-5546</journal-id>
<journal-title><![CDATA[Computación y Sistemas]]></journal-title>
<abbrev-journal-title><![CDATA[Comp. y Sist.]]></abbrev-journal-title>
<issn>1405-5546</issn>
<publisher>
<publisher-name><![CDATA[Instituto Politécnico Nacional, Centro de Investigación en Computación]]></publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id>S1405-55462016000300541</article-id>
<article-id pub-id-type="doi">10.13053/cys-20-3-2451</article-id>
<title-group>
<article-title xml:lang="en"><![CDATA[Exploiting Bishun to Predict the Pronunciation of Chinese]]></article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname><![CDATA[Mi]]></surname>
<given-names><![CDATA[Chenggang]]></given-names>
</name>
<xref ref-type="aff" rid="Aff"/>
<xref ref-type="aff" rid="Aaf"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname><![CDATA[Yang]]></surname>
<given-names><![CDATA[Yating]]></given-names>
</name>
<xref ref-type="aff" rid="Aff"/>
<xref ref-type="aff" rid="Aaf"/>
<xref ref-type="aff" rid="A a"/>
<xref ref-type="aff" rid="A3"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname><![CDATA[Zhou]]></surname>
<given-names><![CDATA[Xi]]></given-names>
</name>
<xref ref-type="aff" rid="Aff"/>
<xref ref-type="aff" rid="Aaf"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname><![CDATA[Wang]]></surname>
<given-names><![CDATA[Lei]]></given-names>
</name>
<xref ref-type="aff" rid="Aff"/>
<xref ref-type="aff" rid="Aaf"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname><![CDATA[Li]]></surname>
<given-names><![CDATA[Xiao]]></given-names>
</name>
<xref ref-type="aff" rid="Aff"/>
<xref ref-type="aff" rid="Aaf"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname><![CDATA[Jiang]]></surname>
<given-names><![CDATA[Tonghai]]></given-names>
</name>
<xref ref-type="aff" rid="Aff"/>
<xref ref-type="aff" rid="Aaf"/>
</contrib>
</contrib-group>
<aff id="Af1">
<institution><![CDATA[,Chinese Academy of Science Xinjiang Technical Institute of Physics & Chemistry ]]></institution>
<addr-line><![CDATA[ ]]></addr-line>
<country>China</country>
</aff>
<aff id="Af2">
<institution><![CDATA[,Xinjiang Key Laboratory of Minority Speech and Language Information Processing  ]]></institution>
<addr-line><![CDATA[ ]]></addr-line>
<country>China</country>
</aff>
<aff id="Af3">
<institution><![CDATA[,Institute of Acoustics, Chinese Academy of Sciences  ]]></institution>
<addr-line><![CDATA[ ]]></addr-line>
<country>China</country>
</aff>
<pub-date pub-type="pub">
<day>00</day>
<month>09</month>
<year>2016</year>
</pub-date>
<pub-date pub-type="epub">
<day>00</day>
<month>09</month>
<year>2016</year>
</pub-date>
<volume>20</volume>
<numero>3</numero>
<fpage>541</fpage>
<lpage>549</lpage>
<copyright-statement/>
<copyright-year/>
<self-uri xlink:href="http://www.scielo.org.mx/scielo.php?script=sci_arttext&amp;pid=S1405-55462016000300541&amp;lng=en&amp;nrm=iso"></self-uri><self-uri xlink:href="http://www.scielo.org.mx/scielo.php?script=sci_abstract&amp;pid=S1405-55462016000300541&amp;lng=en&amp;nrm=iso"></self-uri><self-uri xlink:href="http://www.scielo.org.mx/scielo.php?script=sci_pdf&amp;pid=S1405-55462016000300541&amp;lng=en&amp;nrm=iso"></self-uri><abstract abstract-type="short" xml:lang="en"><p><![CDATA[Abstract. Learning to pronounce Chinese characters is usually considered as a very hard part to foreigners to study Chinese. At beginning, Chinese learners must bear in mind thousands of Chinese characters, including their pronunciation, meanings, Bishun (order of strokes) etc., which is very time consuming and boring. In this paper, we proposed a novel method based on translation model to predict the Chinese character pronunciation automatically. We first convert each Chinese character into Bishun, then, we train the pronunciation prediction model (translation model) according to Bishun and their correspondence Pinyin sequences. To make our model practically, we also introduced some error tolerant strategies. Experimental results show that our method can predict the pronunciation of Chinese characters effectively.]]></p></abstract>
<kwd-group>
<kwd lng="en"><![CDATA[Pronunciation prediction]]></kwd>
<kwd lng="en"><![CDATA[Bishun]]></kwd>
<kwd lng="en"><![CDATA[language model]]></kwd>
<kwd lng="en"><![CDATA[translation model]]></kwd>
<kwd lng="en"><![CDATA[error tolerant]]></kwd>
</kwd-group>
</article-meta>
</front><back>
<ref-list>
<ref id="B1">
<nlm-citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Hsieh]]></surname>
<given-names><![CDATA[S.-K.]]></given-names>
</name>
</person-group>
<source><![CDATA[Concept and Computation: A preliminary survey of Chinese Characters as a Knowledge Resource in NLP]]></source>
<year>2006</year>
<publisher-name><![CDATA[Universität Tübingen]]></publisher-name>
</nlm-citation>
</ref>
<ref id="B2">
<nlm-citation citation-type="confpro">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Byrd]]></surname>
<given-names><![CDATA[R.J.]]></given-names>
</name>
<name>
<surname><![CDATA[Tzoukermann]]></surname>
<given-names><![CDATA[E.]]></given-names>
</name>
</person-group>
<source><![CDATA[Adapting an English morphological analyzer for French]]></source>
<year>1988</year>
<conf-name><![CDATA[ 26th annual meeting on Association for Computational Linguistics]]></conf-name>
<conf-loc> </conf-loc>
<page-range>1-6</page-range></nlm-citation>
</ref>
<ref id="B3">
<nlm-citation citation-type="confpro">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Koehn]]></surname>
<given-names><![CDATA[P.]]></given-names>
</name>
<name>
<surname><![CDATA[Och]]></surname>
<given-names><![CDATA[F.J.]]></given-names>
</name>
<name>
<surname><![CDATA[Marcu]]></surname>
<given-names><![CDATA[D.]]></given-names>
</name>
</person-group>
<source><![CDATA[Statistical phrase-based translation]]></source>
<year>2003</year>
<conf-name><![CDATA[ Conference of the North American Chapter of the Association for Computational Linguistics on Human Language Technology]]></conf-name>
<conf-date>2003</conf-date>
<conf-loc> </conf-loc>
<page-range>48-54</page-range></nlm-citation>
</ref>
<ref id="B4">
<nlm-citation citation-type="book">
<article-title xml:lang=""><![CDATA[Phrase-based statistical machine translation]]></article-title>
<person-group person-group-type="author">
<name>
<surname><![CDATA[Zens]]></surname>
<given-names><![CDATA[R.]]></given-names>
</name>
<name>
<surname><![CDATA[Och]]></surname>
<given-names><![CDATA[F.J.]]></given-names>
</name>
<name>
<surname><![CDATA[Ney]]></surname>
<given-names><![CDATA[H.]]></given-names>
</name>
</person-group>
<source><![CDATA[Advances in Artificial Intelligence]]></source>
<year>2002</year>
<page-range>18-32</page-range><publisher-loc><![CDATA[Berlin Heidelberg ]]></publisher-loc>
<publisher-name><![CDATA[Springer]]></publisher-name>
</nlm-citation>
</ref>
<ref id="B5">
<nlm-citation citation-type="journal">
<article-title xml:lang=""><![CDATA[The alignment template approach to statistical machine translation]]></article-title>
<person-group person-group-type="author">
<name>
<surname><![CDATA[Och]]></surname>
<given-names><![CDATA[F.J.]]></given-names>
</name>
<name>
<surname><![CDATA[Ney]]></surname>
<given-names><![CDATA[H.]]></given-names>
</name>
</person-group>
<source><![CDATA[Computational linguistics]]></source>
<year>2004</year>
<volume>30</volume>
<numero>4</numero>
<issue>4</issue>
<page-range>417-49</page-range></nlm-citation>
</ref>
<ref id="B6">
<nlm-citation citation-type="">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Shi]]></surname>
<given-names><![CDATA[X.]]></given-names>
</name>
<name>
<surname><![CDATA[Knight]]></surname>
<given-names><![CDATA[K.]]></given-names>
</name>
<name>
<surname><![CDATA[Ji]]></surname>
<given-names><![CDATA[H.]]></given-names>
</name>
</person-group>
<source><![CDATA[How to Speak a Language without Knowing It]]></source>
<year>2014</year>
</nlm-citation>
</ref>
<ref id="B7">
<nlm-citation citation-type="journal">
<article-title xml:lang=""><![CDATA[A Generative Data Augmentation Model for Enhancing Chinese Dialect Pronunciation Prediction]]></article-title>
<person-group person-group-type="author">
<name>
<surname><![CDATA[Lin]]></surname>
<given-names><![CDATA[C.-C.]]></given-names>
</name>
<name>
<surname><![CDATA[Tsai]]></surname>
<given-names><![CDATA[R.T.H.]]></given-names>
</name>
</person-group>
<source><![CDATA[Audio, Speech, and Language Processing, IEEE Transactions on]]></source>
<year>2012</year>
<volume>20</volume>
<numero>4</numero>
<issue>4</issue>
<page-range>1109-17</page-range></nlm-citation>
</ref>
<ref id="B8">
<nlm-citation citation-type="journal">
<article-title xml:lang=""><![CDATA[The mathematics of statistical machine translation: Parameter estimation]]></article-title>
<person-group person-group-type="author">
<name>
<surname><![CDATA[Brown]]></surname>
<given-names><![CDATA[P.F.]]></given-names>
</name>
<name>
<surname><![CDATA[Pietra]]></surname>
<given-names><![CDATA[V.J.D.]]></given-names>
</name>
<name>
<surname><![CDATA[Pietra]]></surname>
<given-names><![CDATA[S.A.D.]]></given-names>
</name>
<name>
<surname><![CDATA[Mercer]]></surname>
<given-names><![CDATA[R.L.]]></given-names>
</name>
</person-group>
<source><![CDATA[Computational linguistics]]></source>
<year>1993</year>
<volume>19</volume>
<numero>2</numero>
<issue>2</issue>
<page-range>263-311</page-range></nlm-citation>
</ref>
<ref id="B9">
<nlm-citation citation-type="book">
<article-title xml:lang=""><![CDATA[Parameter Learning for Statistical Machine Translation Using CMA-ES]]></article-title>
<person-group person-group-type="author">
<name>
<surname><![CDATA[Tran]]></surname>
<given-names><![CDATA[V.H.]]></given-names>
</name>
<name>
<surname><![CDATA[Pham]]></surname>
<given-names><![CDATA[A.T.]]></given-names>
</name>
<name>
<surname><![CDATA[Nguyen]]></surname>
<given-names><![CDATA[V.V.]]></given-names>
</name>
<name>
<surname><![CDATA[Nguyen]]></surname>
<given-names><![CDATA[H.X.]]></given-names>
</name>
<name>
<surname><![CDATA[Nguyen]]></surname>
<given-names><![CDATA[H.Q.]]></given-names>
</name>
</person-group>
<source><![CDATA[Knowledge and Systems Engineering]]></source>
<year>2015</year>
<page-range>425-32</page-range><publisher-name><![CDATA[Springer International Publishing]]></publisher-name>
</nlm-citation>
</ref>
<ref id="B10">
<nlm-citation citation-type="journal">
<article-title xml:lang=""><![CDATA[A neural probabilistic language model]]></article-title>
<person-group person-group-type="author">
<name>
<surname><![CDATA[Bengio]]></surname>
<given-names><![CDATA[Y.]]></given-names>
</name>
<name>
<surname><![CDATA[Ducharme]]></surname>
<given-names><![CDATA[R.]]></given-names>
</name>
<name>
<surname><![CDATA[Vincent]]></surname>
<given-names><![CDATA[P.]]></given-names>
</name>
<name>
<surname><![CDATA[Jauvin]]></surname>
<given-names><![CDATA[C.]]></given-names>
</name>
</person-group>
<source><![CDATA[The Journal of Machine Learning Research]]></source>
<year>2003</year>
<page-range>1137-55</page-range></nlm-citation>
</ref>
<ref id="B11">
<nlm-citation citation-type="journal">
<article-title xml:lang=""><![CDATA[Continuous space language models]]></article-title>
<person-group person-group-type="author">
<name>
<surname><![CDATA[Schwenk]]></surname>
<given-names><![CDATA[H.]]></given-names>
</name>
</person-group>
<source><![CDATA[Computer Speech &amp; Language]]></source>
<year>2007</year>
<volume>21</volume>
<numero>3</numero>
<issue>3</issue>
<page-range>492-518</page-range></nlm-citation>
</ref>
<ref id="B12">
<nlm-citation citation-type="journal">
<article-title xml:lang=""><![CDATA[lass-based n-gram models of natural language]]></article-title>
<person-group person-group-type="author">
<name>
<surname><![CDATA[Brown]]></surname>
<given-names><![CDATA[P.F.]]></given-names>
</name>
<name>
<surname><![CDATA[deSouza]]></surname>
<given-names><![CDATA[P.V.]]></given-names>
</name>
<name>
<surname><![CDATA[Mercer]]></surname>
<given-names><![CDATA[R.L.]]></given-names>
</name>
<name>
<surname><![CDATA[Pietra]]></surname>
<given-names><![CDATA[V.J.]]></given-names>
</name>
<name>
<surname><![CDATA[Lai]]></surname>
<given-names><![CDATA[J.C.]]></given-names>
</name>
</person-group>
<source><![CDATA[Computational linguistics]]></source>
<year>1992</year>
<volume>18</volume>
<numero>4</numero>
<issue>4</issue>
<page-range>467-79</page-range></nlm-citation>
</ref>
<ref id="B13">
<nlm-citation citation-type="confpro">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Buck]]></surname>
<given-names><![CDATA[C.]]></given-names>
</name>
<name>
<surname><![CDATA[Heafield]]></surname>
<given-names><![CDATA[K.]]></given-names>
</name>
<name>
<surname><![CDATA[van Ooyen]]></surname>
<given-names><![CDATA[B.]]></given-names>
</name>
</person-group>
<source><![CDATA[N-gram counts and language models from the common crawl]]></source>
<year>2014</year>
<conf-name><![CDATA[ Language Resources and Evaluation Conference]]></conf-name>
<conf-loc> </conf-loc>
</nlm-citation>
</ref>
<ref id="B14">
<nlm-citation citation-type="">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Singh]]></surname>
<given-names><![CDATA[A.]]></given-names>
</name>
</person-group>
<source><![CDATA[The EM Algorithm]]></source>
<year>2005</year>
</nlm-citation>
</ref>
<ref id="B15">
<nlm-citation citation-type="journal">
<article-title xml:lang=""><![CDATA[What is the expectation maximization algorithm?]]></article-title>
<person-group person-group-type="author">
<name>
<surname><![CDATA[Do]]></surname>
<given-names><![CDATA[C.B.]]></given-names>
</name>
<name>
<surname><![CDATA[Batzoglou]]></surname>
<given-names><![CDATA[S.]]></given-names>
</name>
</person-group>
<source><![CDATA[Nature biotechnology]]></source>
<year>2008</year>
<volume>26</volume>
<numero>8</numero>
<issue>8</issue>
<page-range>897-900</page-range></nlm-citation>
</ref>
<ref id="B16">
<nlm-citation citation-type="journal">
<article-title xml:lang=""><![CDATA[Maximum likelihood from incomplete data via the EM algorithm]]></article-title>
<person-group person-group-type="author">
<name>
<surname><![CDATA[Dempster]]></surname>
<given-names><![CDATA[A.P.]]></given-names>
</name>
<name>
<surname><![CDATA[Laird]]></surname>
<given-names><![CDATA[N.M.]]></given-names>
</name>
<name>
<surname><![CDATA[Rubin]]></surname>
<given-names><![CDATA[D.B.]]></given-names>
</name>
</person-group>
<source><![CDATA[Journal of the royal statistical society. Series B (methodological)]]></source>
<year>1977</year>
<page-range>1-38</page-range></nlm-citation>
</ref>
<ref id="B17">
<nlm-citation citation-type="confpro">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Liu]]></surname>
<given-names><![CDATA[Y.]]></given-names>
</name>
<name>
<surname><![CDATA[Xia]]></surname>
<given-names><![CDATA[T.]]></given-names>
</name>
<name>
<surname><![CDATA[Xiao]]></surname>
<given-names><![CDATA[X.]]></given-names>
</name>
<name>
<surname><![CDATA[Liu]]></surname>
<given-names><![CDATA[Q.]]></given-names>
</name>
</person-group>
<source><![CDATA[Weighted alignment matrices for statistical machine translation]]></source>
<year>2009</year>
<conf-name><![CDATA[ Conference on Empirical Methods in Natural Language Processing]]></conf-name>
<conf-date>2009</conf-date>
<conf-loc> </conf-loc>
<page-range>1017-26</page-range></nlm-citation>
</ref>
<ref id="B18">
<nlm-citation citation-type="confpro">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Niehues]]></surname>
<given-names><![CDATA[J.]]></given-names>
</name>
<name>
<surname><![CDATA[Vogel]]></surname>
<given-names><![CDATA[S.]]></given-names>
</name>
</person-group>
<source><![CDATA[Discriminative word alignment via alignment matrix modeling]]></source>
<year>2008</year>
<conf-name><![CDATA[ Third Workshop on Statistical Machine Translation]]></conf-name>
<conf-loc> </conf-loc>
<page-range>18-25</page-range></nlm-citation>
</ref>
<ref id="B19">
<nlm-citation citation-type="confpro">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Koehn]]></surname>
<given-names><![CDATA[P.]]></given-names>
</name>
<name>
<surname><![CDATA[Hoang]]></surname>
<given-names><![CDATA[H.]]></given-names>
</name>
<name>
<surname><![CDATA[Birch]]></surname>
<given-names><![CDATA[A.]]></given-names>
</name>
<name>
<surname><![CDATA[Callison-Burch]]></surname>
<given-names><![CDATA[C.]]></given-names>
</name>
<name>
<surname><![CDATA[Federico]]></surname>
<given-names><![CDATA[M.]]></given-names>
</name>
<name>
<surname><![CDATA[Bertoldi]]></surname>
<given-names><![CDATA[N.]]></given-names>
</name>
<name>
<surname><![CDATA[Cowan]]></surname>
<given-names><![CDATA[B.]]></given-names>
</name>
<name>
<surname><![CDATA[Shen]]></surname>
<given-names><![CDATA[W.]]></given-names>
</name>
<name>
<surname><![CDATA[Moran]]></surname>
<given-names><![CDATA[C.]]></given-names>
</name>
<name>
<surname><![CDATA[Zens]]></surname>
<given-names><![CDATA[R.]]></given-names>
</name>
<name>
<surname><![CDATA[Dyer]]></surname>
<given-names><![CDATA[C.]]></given-names>
</name>
<name>
<surname><![CDATA[Bojar]]></surname>
<given-names><![CDATA[O.]]></given-names>
</name>
<name>
<surname><![CDATA[Constantin]]></surname>
<given-names><![CDATA[A.]]></given-names>
</name>
<name>
<surname><![CDATA[Herbst]]></surname>
<given-names><![CDATA[E.]]></given-names>
</name>
</person-group>
<source><![CDATA[Moses: Open source toolkit for statistical machine translation]]></source>
<year>2007</year>
<conf-name><![CDATA[ 45th annual meeting of the ACL on interactive poster and demonstration sessions]]></conf-name>
<conf-loc> </conf-loc>
<page-range>177-80</page-range></nlm-citation>
</ref>
<ref id="B20">
<nlm-citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname><![CDATA[Stolcke]]></surname>
<given-names><![CDATA[A.]]></given-names>
</name>
</person-group>
<source><![CDATA[SRILM-an extensible language modeling toolkit]]></source>
<year>2002</year>
<publisher-name><![CDATA[INTERSPEECH]]></publisher-name>
</nlm-citation>
</ref>
</ref-list>
</back>
</article>
