نتایج جستجو برای: web data record extraction

تعداد نتایج: 2734823  

2004
Mark H. Butler John Gilbert Andy Seaborne Kevin Smathers

SIMILE is a joint project between MIT Libraries, MIT Computer Science and Artificial Intelligence Laboratory (CSAIL), HP Labs and the World Wide Web Consortium (W3C). It is investigating the application of Semantic Web tools, such as the Resource Description Framework (RDF), to the problem of dealing with heterogeneous metadata. This report describes how XML and RDF tools are used to perform da...

2011
Daiyue Weng Jun Hong David A. Bell

Web databases contain a large amount of structured data which are accessible via their query interfaces only. Query results are presented in dynamically generated web pages, usually in the form of data records, for human use. The problem of automatically extracting data records from query result pages is critical for web data integration applications, such as comparison shopping sites, meta-sea...

2012
A Suresh Babu

Web databases contain a huge amount of structured data which are easily obtained via their query interfaces only. Query results are presented in dynamically generated web pages, usually in the form of data records, for human use. Decisive for web data integration applications is the problem of automatically extracting data records from query result pages, such as comparison shopping sites, meta...

2009
Alexander Löser Steffen Lutter Patrick Düssel Volker Markl

We discuss the novel problem of supporting analytical business intelligence queries over web-based textual content, e.g., BI-style reports based on 100.000’s of documents from an ad-hoc web search result. Neither conventional search engines nor conventional Business Intelligence and ETL tools address this problem, which lies at the intersection of their capabilities. “Google Squared” or our sys...

Journal: :Bulletin of National Technical University "KhPI". Series: System Analysis, Control and Information Technologies 2018

2015
Chia-Hui Chang Chun-Nan Hsu

The explosive growth and popularity of the World Wide Web has resulted in a huge number of information sources on the Internet. However, due to the heterogeneity and the lack of structure of Web information sources, access to this huge collection of information has been limited to browsing and keyword searching. Sophisticated Webmining applications, such as comparison shopping, require expensiv...

2009
Alexander Löser Steffen Lutter Patrick Düssel Volker Markl

We discuss the novel problem of supporting analytical business intelligence queries over web-based textual content, e.g., BI-style reports based on 100.000’s of documents from an ad-hoc web search result. Neither conventional search engines nor conventional Business Intelligence and ETL tools address this problem, which lies at the intersection of their capabilities. “Google Squared” or our sys...

2006
Siddu P. Algur P S Hiremath

This paper studies the problem of identification and extraction of structured data items from the nested and flat records of given web pages. Each of such pages may contain several groups of structured records. Most of the existing methods still have certain limitations. In this paper, we propose a more novel and effective technique for the extraction of data items. Given a page, the proposed t...

2013
S. Ishwarya

Online data request and respond to a user query with result records are programmed in HTML files. Extracting information from the unstructured bases has matured into a significant technical challenge whereas generally, data extraction had to deal with changes in physical hardware plans, the majority of current data mining deals with extracting data from the unstructured data sources, and from d...

نمودار تعداد نتایج جستجو در هر سال

با کلیک روی نمودار نتایج را به سال انتشار فیلتر کنید

function paginate(evt) { url=/search_year_filter/ var term=document.getElementById("search_meta_data").dataset.term pg=parseInt(evt.target.text) var data={ "year":filter_year, "term":term, "pgn":pg } filtered_res=post_and_fetch(data,url) window.scrollTo(0,0); } function update_search_meta(search_meta) { meta_place=document.getElementById("search_meta_data") term=search_meta.term active_pgn=search_meta.pgn num_res=search_meta.num_res num_pages=search_meta.num_pages year=search_meta.year meta_place.dataset.term=term meta_place.dataset.page=active_pgn meta_place.dataset.num_res=num_res meta_place.dataset.num_pages=num_pages meta_place.dataset.year=year document.getElementById("num_result_place").innerHTML=num_res if (year !== "unfilter"){ document.getElementById("year_filter_label").style="display:inline;" document.getElementById("year_filter_place").innerHTML=year }else { document.getElementById("year_filter_label").style="display:none;" document.getElementById("year_filter_place").innerHTML="" } } function update_pagination() { search_meta_place=document.getElementById('search_meta_data') num_pages=search_meta_place.dataset.num_pages; active_pgn=parseInt(search_meta_place.dataset.page); document.getElementById("pgn-ul").innerHTML=""; pgn_html=""; for (i = 1; i <= num_pages; i++){ if (i===active_pgn){ actv="active" }else {actv=""} pgn_li="
  • " +i+ "
  • "; pgn_html+=pgn_li; } document.getElementById("pgn-ul").innerHTML=pgn_html var pgn_links = document.querySelectorAll('.mypgn'); pgn_links.forEach(function(pgn_link) { pgn_link.addEventListener('click', paginate) }) } function post_and_fetch(data,url) { showLoading() xhr = new XMLHttpRequest(); xhr.open('POST', url, true); xhr.setRequestHeader('Content-Type', 'application/json; charset=UTF-8'); xhr.onreadystatechange = function() { if (xhr.readyState === 4 && xhr.status === 200) { var resp = xhr.responseText; resp_json=JSON.parse(resp) resp_place = document.getElementById("search_result_div") resp_place.innerHTML = resp_json['results'] search_meta = resp_json['meta'] update_search_meta(search_meta) update_pagination() hideLoading() } }; xhr.send(JSON.stringify(data)); } function unfilter() { url=/search_year_filter/ var term=document.getElementById("search_meta_data").dataset.term var data={ "year":"unfilter", "term":term, "pgn":1 } filtered_res=post_and_fetch(data,url) } function deactivate_all_bars(){ var yrchart = document.querySelectorAll('.ct-bar'); yrchart.forEach(function(bar) { bar.dataset.active = false bar.style = "stroke:#71a3c5;" }) } year_chart.on("created", function() { var yrchart = document.querySelectorAll('.ct-bar'); yrchart.forEach(function(check) { check.addEventListener('click', checkIndex); }) }); function checkIndex(event) { var yrchart = document.querySelectorAll('.ct-bar'); var year_bar = event.target if (year_bar.dataset.active == "true") { unfilter_res = unfilter() year_bar.dataset.active = false year_bar.style = "stroke:#1d2b3699;" } else { deactivate_all_bars() year_bar.dataset.active = true year_bar.style = "stroke:#e56f6f;" filter_year = chart_data['labels'][Array.from(yrchart).indexOf(year_bar)] url=/search_year_filter/ var term=document.getElementById("search_meta_data").dataset.term var data={ "year":filter_year, "term":term, "pgn":1 } filtered_res=post_and_fetch(data,url) } } function showLoading() { document.getElementById("loading").style.display = "block"; setTimeout(hideLoading, 10000); // 10 seconds } function hideLoading() { document.getElementById("loading").style.display = "none"; } -->