BIS 2008

BIS record '2008/1/484'


  author        = {Qi Zhang and
                   Guoping Hu and
                   Lihua Yue},
  title         = {Chinese Organization Entity Recognition and Association on Web Pages},
  year          = {2008},
  session       = {Information Retrieval},
  pages         = {12--23},
  crossref      = {BIS:2008/1},
  bibsource     = {BIS,},
  abstract      = {In this paper, we consider the problem of automatic Chinese Named Entity Recognition (NER) on web pages and try to extract the association between recognized entities. Usually NER approaches mainly focus on plain text and get poor results on the Web pages of Internet. In this paper, we first explore the difference of plain texts and web pages for NER. Based on characteristic of HTML structure, we propose a set of unified methods to recognize and associate entities on web pages. In our experiments, the F-measure of organization name recognition is 73.6%, where 14.3% improvement is achieved beyond the baseline system. The F-measure of organization name-address association on page level is 77.5%, and the performance achieves 89.5% on corpus level, which indicates that our approach is quite effective and practical.}

  editor        = {Witold Abramowicz, Dieter Fensel},
  booktitle     = {Business Information Systems, 11th International Conference, BIS 2008, Innsbruck, Austria, May 2008},
  title         = {Business Information Systems, 11th International Conference, BIS 2008, Innsbruck, Austria, May 2008},
  publisher     = {Springer-Verlag},
  isbn          = {978-3-540-79395-3},
  year          = {2008},
  bibsource     = {BIS,}

BIS index BIS 2008
Dept. of Information Systems at Poznan University of Economics, Dominik Flejter