@inproceedings{89436dc4ea024ef49821b1a8c6bdcdee,
title = "A quantitative summary of XML structures",
abstract = "Statistical summaries in relational databases mainly focus on the distribution of data values and have been found useful for various applications, such as query evaluation and data storage. As xml has been widely used, e.g. for online data exchange, the need for (corresponding) statistical summaries in xml has been evident. While relational techniques may be applicable to the data values in xml documents, novel techniques are requried for summarizing the structures of xml documents. In this paper, we propose metrics for major structural properties, in particular, nestings of entities and one-to-many relationships, of XML documents. Our technique is different from the existing ones in that we generate a quantitative summary of an xml structure. By using our approach, we illustrate that some popular real-world and synthetic xml benchmark datasets are indeed highly skewed and hardly hierarchical and contain few recursions. We wish this preliminary finding shreds insight on improving the design of xml benchmarking and experimentations.",
keywords = "Support Ratio, Query Evaluation, Selectivity Estimation, Document Instance, Query Workload",
author = "Zi Lin and Bingsheng He and Byron Choi",
year = "2006",
month = oct,
day = "24",
doi = "10.1007/11901181_18",
language = "English",
isbn = "354047224X",
series = " Lecture Notes in Computer Science (LNCS)",
publisher = "Springer Berlin Heidelberg",
pages = "228--240",
editor = "Embley, {David W.} and Antoni Oliv{\'e} and Sudha Ram",
booktitle = "Conceptual Modeling - ER 2006",
edition = "1st",
note = "25th International Conference on Conceptual Modeling - ER 2006 ; Conference date: 06-11-2006 Through 09-11-2006",
url = "https://link.springer.com/book/10.1007/11901181",
}