{"created":"2021-03-01T05:52:08.964873+00:00","id":407,"links":{},"metadata":{"_buckets":{"deposit":"c43fdbf4-cb97-4614-ab0f-e8c7643eb361"},"_deposit":{"id":"407","owners":[],"pid":{"revision_id":0,"type":"depid","value":"407"},"status":"published"},"_oai":{"id":"oai:repository.nii.ac.jp:00000407","sets":["136"]},"author_link":[],"control_number":"407","item_5_biblio_info_30":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2005-07-20","bibliographicIssueDateType":"Issued"},"bibliographicPageEnd":"13","bibliographicPageStart":"1","bibliographic_titles":[{"bibliographic_title":"NIIテクニカル・レポート","bibliographic_titleLang":"ja"},{"bibliographic_title":"NII Technical Report","bibliographic_titleLang":"en"}]}]},"item_5_description_28":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"Orthographic varieties are common in the Japanese language, and represent a serious problem for Japanese information retrieval (IR), as IR systems run the risk of missing documents that contain variant forms of the search term. We propose two different strategies for handling orthographic varieties: pronunciation or yomi-based indexing and \"Fuzzy Querying\", comparing katakana terms based on edit distance. Both strategies were integrated into our multiple index and fusion system, and tested using two different test collections, newspaper articles (Mainichi Shimbun '98) and scientific abstracts (NTCIR-1), to compare their performance across text genres. The fusion of the results obtained with a bi-gram-based, a word-based, and the additional yomi-based index was found to improve precision significantly for the NTCIR-1 collection, but only slightly for the Mainichi Shimbun '98 collection. Adding Fuzzy Querying as a fourth system and merging the results led to a further, but not significant, improvement in precision.","subitem_description_language":"en","subitem_description_type":"Abstract"}]},"item_5_identifier_registration":{"attribute_name":"ID登録","attribute_value_mlt":[{"subitem_identifier_reg_text":"10.20736/0000000407","subitem_identifier_reg_type":"JaLC"}]},"item_5_publisher_31":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"国立情報学研究所","subitem_publisher_language":"ja"}]},"item_5_source_id_32":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"1346-5597","subitem_source_identifier_type":"ISSN"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Kummer, Nina","creatorNameLang":"en"}]},{"creatorNames":[{"creatorName":"Womser-Hacker, Christa","creatorNameLang":"en"}]},{"creatorNames":[{"creatorName":"神門, 典子","creatorNameLang":"ja"},{"creatorName":"Kando, Noriko","creatorNameLang":"en"}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2015-08-27"}],"displaytype":"detail","filename":"05-011E.pdf","filesize":[{"value":"348.1 kB"}],"format":"application/pdf","licensetype":"license_note","mimetype":"application/pdf","url":{"label":"NII Technical Report (NII-2005-011E):Handling Orthographic Varieties in Japanese Information Retrieval: Fusion of Word-, N-gram-, and Yomi-Based Indices across Different Document Collections","url":"https://repository.nii.ac.jp/record/407/files/05-011E.pdf"},"version_id":"37dd76a4-2170-4be7-912e-26f70f2f34e2"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"テクニカルレポート","subitem_subject_language":"ja","subitem_subject_scheme":"Other"},{"subitem_subject":"Technical Report","subitem_subject_language":"en","subitem_subject_scheme":"Other"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"departmental bulletin paper","resourceuri":"http://purl.org/coar/resource_type/c_6501"}]},"item_title":"NII Technical Report (NII-2005-011E):Handling Orthographic Varieties in Japanese Information Retrieval: Fusion of Word-, N-gram-, and Yomi-Based Indices across Different Document Collections","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"NII Technical Report (NII-2005-011E):Handling Orthographic Varieties in Japanese Information Retrieval: Fusion of Word-, N-gram-, and Yomi-Based Indices across Different Document Collections","subitem_title_language":"en"}]},"item_type_id":"5","owner":"1","path":["136"],"pubdate":{"attribute_name":"PubDate","attribute_value":"2005-07-20"},"publish_date":"2005-07-20","publish_status":"0","recid":"407","relation_version_is_last":true,"title":["NII Technical Report (NII-2005-011E):Handling Orthographic Varieties in Japanese Information Retrieval: Fusion of Word-, N-gram-, and Yomi-Based Indices across Different Document Collections"],"weko_creator_id":"1","weko_shared_id":-1},"updated":"2022-12-27T07:57:38.303023+00:00"}