BIS record '2002/1/189'
BibTeX
@inproceedings{BIS:2002/1/189,
author = {Norbert Fuhr},
title = {Information Retrieval Methods for XML Documents},
year = {2002},
session = {Invited Speakers},
pages = {21--25},
crossref = {BIS:2002/1},
bibsource = {BIS, http://bis.kie.ue.poznan.pl/biblio/},
abstract = {XML is going to be established as standard document format, especially for Web-based applications. The major purpose of XML markup is the explicit representation of the logical structure of a document. Given this markup, different kinds of operations referring to the logical structure can be performed on XML documents:
- Multiple views on a document can be generated (e.g. for different audiences), specific elements of an XML document can be extracted, or documents fulfilling specific structural conditions can be retrieved from a document base. Overall, if information is represented in XML format, exchange of this information between different software systems (especially on the Web) is simplified, thus supporting interoperability. Looking at the broad variety of XML applications and systems that are currently under development, one can see that there are in fact two different views on XML: The document-centric view focuses on structured documents in the traditional sense (based on concepts from electronic publishing, especially SGML). Here XML is used for logical markup of texts both at the macro level (e.g. chapter, section, paragraph) and the micro level (e.g. MathML for mathematical formulas, CML for chemical formulas). XML DTDs, namespaces, XPath and XSL are W3C standards 1 based on this view.
- The data-centric view uses XML for exchanging formatted data in a generic, serialized form between different applications (e.g. spreadsheets, database records). This is especially important for the interoperability of Web services (e.g. e-business applications). XML schema and the proposed XML query language XQuery address the data-centric issues of XML. Information retrieval (IR) methods mainly refer to the document-centric view of XML (although the essential concepts of uncertainty and vagueness also may be applicable for certain data-centric applications, e.g. for retrieval of metadata records). As the core of IR methods, appropriate retrieval methods must be available. In the "
"following, we first present XIRQL, an XML query language based on IR concepts. Then we describe the CLASSIX project, which aims at developing various IR methods for XML documents. Finally, we give a brief survey over related work.
}
}
@proceedings{BIS:2002/1,
editor = {Witold Abramowicz},
booktitle = {BIS 2002, 5th International Conference on Business Information Systems, Poznań, Poland, 24-25 April 2002},
title = {BIS 2002, 5th International Conference on Business Information Systems, Poznań, Poland, 24-25 April 2002},
publisher = {Department of Information Systems, Poznań University of Economics},
isbn = {83-916842-0-2},
year = {2002},
bibsource = {BIS, http://bis.kie.ue.poznan.pl/biblio/}
}
BIS index BIS 2002
Dept. of Information Systems at Poznan University of Economics, Dominik Flejter