getting started
#start with clean workspace
rm(list=ls())
load("./data/names_df_v20221005.RData")
packages
library(data.table)
library(tidyverse)
require(xml2)
require(rvest)
require(devtools)
require(scholar)
require(stringi)
andere strategie. alle nobiliary particles er af. dan zonder deze
door databank. dan indien geen info, via particles NL-identiteit (of
spaans of Duits of arabisch)
lastname_df <- names_df
lastname_df$lastname2 <- as.character(str_split(lastname_df$lastname, pattern=",", n = 2, simplify = TRUE)[,1])
lastname_df$np <- as.character(str_split(lastname_df$lastname, pattern=",", n = 2, simplify = TRUE)[,2])
# creating URLs: origin
flaname <- function(x){
paste(toupper(substring(x, 1, 1)),
tolower(substring(x, 2, nchar(x))),
sep = "")
}
lastname_df$lastname2 <- flaname(lastname_df$lastname2)
lastname_df$name_origin <- paste0("https://www.cbgfamilienamen.nl/nfb/detail_naam.php?gba_naam=",
lastname_df$lastname2,
"&nfd_naam=",
lastname_df$lastname2,
"&info=analyse+en+verklaring&operator=eq&taal=")
https://www.cbgfamilienamen.nl/nfb/detail_naam.php?gba_naam=tolsma&gba_naam=Tolsma&nfd_naam=&info=analyse+en+verklaring&operator=eq&taal=
#lastname_df$name_origin[14] https://www.cbgfamilienamen.nl/nfb/detail_naam.php?gba_naam=
Tolsma &nfd_naam= Tolsma
&info=analyse+en+verklaring&operator=eq&taal=
#https://www.cbgfamilienamen.nl/nfb/detail_naam.php?gba_lcnaam=kraaykamp&gba_naam=Kraaykamp&nfd_naam=Kraaijkamp%20(y)&operator=eq&taal=
hier slaan we alles op
name_originl <- list()
table_originl <- list()
time <- 0.1
crucical scrape
loop
for (i in 1:nrow(lastname_df)) {
print(i)
Sys.sleep(time)
tryCatch({
name_originl[[i]] <- read_html(lastname_df[i, c("name_origin")])
table_originl[[i]] <- name_originl[[i]] %>% html_table()
},
warning = function(w) {
cat("WARNING:", conditionMessage(w), "\n") #WARNING message
},
error=function(e){
err <- conditionMessage(e)
cat("Error:", conditionMessage(e), "\n") #ERROR message
}
)
}
en vanaf hier is het eigenlijk alleen maar opschonen.
origin_txt <- list()
for (i in 1:length(name_originl)) {
origin_txt [[i]] <- name_originl[[i]] %>% html_text() %>% as.character()
}
# Get out the relevant origin information from the xml lists
origin_ln <- list()
for (i in 1:length(name_originl)) {
origin_ln[[i]] <- name_originl[[i]] %>% html_nodes("div") %>% rvest::html_text()
origin_ln[[i]] <- origin_ln[[i]][[3]]
}
# Remove mess
for (i in 1:length(origin_ln)) {
origin_ln[[i]] <- gsub("\\t", " ", origin_ln[[i]])
origin_ln[[i]] <- gsub("\\n", " ", origin_ln[[i]])
}
# Flatten nested structure of the origin information
#origin_ln <- rbind(flatten(origin_ln))
# Detaching the names and origin info for easier data handling
origin <- unlist(origin_ln)
origin <- str_extract_all(origin, "varianten(.*?)©")
# Origin information is usually mentioned after "verklaring" or "kenmerken"
origin <- str_remove_all(origin, "varianten")
origin <- str_remove_all(origin, "CBG Bronnen")
origin <- str_remove_all(origin, "catalogus")
origin <- str_remove_all(origin, "©")
verklaring <- str_remove_all(origin, "kenmerken:(.*?)$")
kenmerken <- str_extract_all(origin, "kenmerken:(.*?)$")
kenmerken <- str_remove_all(kenmerken, "specifieke componenten:(.*?)$")
sc <- str_extract_all(origin, "specifieke componenten:(.*?)$") # Not directly relevant to us, but does mean that the name has a webpage
# Make into a neat dataframe with the names attached
verklaring <- trimws(verklaring, which = "both")
kenmerken <- trimws(kenmerken, which = "both")
sc <- trimws(sc, which = "both")
vk <- data.frame(lastname_df$id,lastname_df$lastname, verklaring, kenmerken, sc)
Separating names with
Dutch & unknown origin
Next, we identify those names for which no additional information was
found. This is important to distinguish Dutch names from names with
unknown origins.
- Dutch names: no label indicating that the name is Dutch, but some
other information available on name origin
- Unknown names: web page cannot be found, so origin information is
empty.
# Identify last names that could not be found
vk <- vk %>%
mutate(verklaring = ifelse(verklaring=="", 0, verklaring),
kenmerken = ifelse(kenmerken=="character(0)", 0, kenmerken),
sc = ifelse(sc=="character(0)", 0, sc),
no_info = nchar(verklaring) + nchar(kenmerken) + nchar(sc))
vk <- vk %>%
mutate(no_info = ifelse(no_info==3, 1, 0),
verklaring = ifelse(verklaring==0, NA, verklaring),
kenmerken = ifelse(kenmerken==0, NA, kenmerken))
# If there is no text in verklaring or kenmerken, the name could not be found in the databases.
LS0tDQp0aXRsZTogIjMuIE9yaWdpbiINCmF1dGhvcjogImJ5OiBOaW5hIEJyYW50ZW4iDQpiaWJsaW9ncmFwaHk6IHJlZmVyZW5jZXMuYmliDQotLS0NCg0KDQoNCmBgYHtyLCBpbnN0YWxsIHJlbW90ZWx5LCBnbG9iYWxzZXR0aW5ncywgZWNobz1GQUxTRSwgd2FybmluZz1GQUxTRSwgcmVzdWx0cz0naGlkZScsIGV2YWw9RkFMU0V9DQppbnN0YWxsLnBhY2thZ2VzKCJyZW1vdGVzIikNCnJlbW90ZXM6Omluc3RhbGxfZ2l0aHViKCJybGVzdXIva2xpcHB5IikNCmBgYCANCg0KYGBge3IsIGdsb2JhbHNldHRpbmdzLCBlY2hvPUZBTFNFLCB3YXJuaW5nPUZBTFNFLCByZXN1bHRzPSdoaWRlJ30NCmxpYnJhcnkoa25pdHIpDQpsaWJyYXJ5KHJnbCkNCg0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQ0Kb3B0c19jaHVuayRzZXQodGlkeS5vcHRzPWxpc3Qod2lkdGguY3V0b2ZmPTEwMCksdGlkeT1UUlVFLCB3YXJuaW5nID0gRkFMU0UsIG1lc3NhZ2UgPSBGQUxTRSxjb21tZW50ID0gIiM+IiwgY2FjaGU9VFJVRSwgY2xhc3Muc291cmNlPWMoInRlc3QiKSwgY2xhc3Mub3V0cHV0PWMoInRlc3QyIikpDQpvcHRpb25zKHdpZHRoID0gMTAwKQ0KcmdsOjpzZXR1cEtuaXRyKCkNCg0KDQoNCmNvbG9yaXplIDwtIGZ1bmN0aW9uKHgsIGNvbG9yKSB7c3ByaW50ZigiPHNwYW4gc3R5bGU9J2NvbG9yOiAlczsnPiVzPC9zcGFuPiIsIGNvbG9yLCB4KSB9DQoNCmBgYA0KDQpgYGB7ciBrbGlwcHksIGVjaG89RkFMU0UsIGluY2x1ZGU9VFJVRX0NCmtsaXBweTo6a2xpcHB5KHBvc2l0aW9uID0gYygndG9wJywgJ3JpZ2h0JykpDQoja2xpcHB5OjprbGlwcHkoY29sb3IgPSAnZGFya3JlZCcpDQoja2xpcHB5OjprbGlwcHkodG9vbHRpcF9tZXNzYWdlID0gJ0NsaWNrIHRvIGNvcHknLCB0b29sdGlwX3N1Y2Nlc3MgPSAnRG9uZScpDQpgYGANCg0KIyBnZXR0aW5nIHN0YXJ0ZWQNCg0KYGBge3IsIGV2YWw9RkFMU0V9DQojc3RhcnQgd2l0aCBjbGVhbiB3b3Jrc3BhY2UgDQpybShsaXN0PWxzKCkpDQpsb2FkKCIuL2RhdGEvbmFtZXNfZGZfdjIwMjIxMDA1LlJEYXRhIikNCmBgYA0KDQoNCg0KIyBwYWNrYWdlcw0KDQpgYGB7cn0NCmxpYnJhcnkoZGF0YS50YWJsZSkgDQpsaWJyYXJ5KHRpZHl2ZXJzZSkgDQpyZXF1aXJlKHhtbDIpDQpyZXF1aXJlKHJ2ZXN0KQ0KcmVxdWlyZShkZXZ0b29scykNCnJlcXVpcmUoc2Nob2xhcikNCnJlcXVpcmUoc3RyaW5naSkNCg0KYGBgDQoNCmFuZGVyZSBzdHJhdGVnaWUuIGFsbGUgbm9iaWxpYXJ5IHBhcnRpY2xlcyBlciBhZi4NCmRhbiB6b25kZXIgZGV6ZSBkb29yIGRhdGFiYW5rLiANCmRhbiBpbmRpZW4gZ2VlbiBpbmZvLCB2aWEgcGFydGljbGVzIE5MLWlkZW50aXRlaXQgKG9mIHNwYWFucyBvZiBEdWl0cyBvZiBhcmFiaXNjaCkNCg0KYGBge3IsIGV2YWw9RkFMU0V9DQpsYXN0bmFtZV9kZiA8LSBuYW1lc19kZg0KbGFzdG5hbWVfZGYkbGFzdG5hbWUyIDwtIGFzLmNoYXJhY3RlcihzdHJfc3BsaXQobGFzdG5hbWVfZGYkbGFzdG5hbWUsIHBhdHRlcm49IiwiLCBuID0gMiwgc2ltcGxpZnkgPSBUUlVFKVssMV0pDQpsYXN0bmFtZV9kZiRucCA8LSBhcy5jaGFyYWN0ZXIoc3RyX3NwbGl0KGxhc3RuYW1lX2RmJGxhc3RuYW1lLCBwYXR0ZXJuPSIsIiwgbiA9IDIsIHNpbXBsaWZ5ID0gVFJVRSlbLDJdKQ0KYGBgDQoNCg0KYGBge3IsIGV2YWw9RkFMU0V9DQojIGNyZWF0aW5nIFVSTHM6IG9yaWdpbg0KZmxhbmFtZSA8LSBmdW5jdGlvbih4KXsNCiAgICBwYXN0ZSh0b3VwcGVyKHN1YnN0cmluZyh4LCAxLCAxKSksDQogICAgICAgICAgdG9sb3dlcihzdWJzdHJpbmcoeCwgMiwgbmNoYXIoeCkpKSwNCiAgICAgICAgICBzZXAgPSAiIikNCn0NCmxhc3RuYW1lX2RmJGxhc3RuYW1lMiA8LSBmbGFuYW1lKGxhc3RuYW1lX2RmJGxhc3RuYW1lMikNCg0KbGFzdG5hbWVfZGYkbmFtZV9vcmlnaW4gPC0gcGFzdGUwKCJodHRwczovL3d3dy5jYmdmYW1pbGllbmFtZW4ubmwvbmZiL2RldGFpbF9uYWFtLnBocD9nYmFfbmFhbT0iLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBsYXN0bmFtZV9kZiRsYXN0bmFtZTIsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICImbmZkX25hYW09IiwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbGFzdG5hbWVfZGYkbGFzdG5hbWUyLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiJmluZm89YW5hbHlzZStlbit2ZXJrbGFyaW5nJm9wZXJhdG9yPWVxJnRhYWw9IikNCmBgYA0KDQpodHRwczovL3d3dy5jYmdmYW1pbGllbmFtZW4ubmwvbmZiL2RldGFpbF9uYWFtLnBocD9nYmFfbmFhbT10b2xzbWEmZ2JhX25hYW09VG9sc21hJm5mZF9uYWFtPSZpbmZvPWFuYWx5c2UrZW4rdmVya2xhcmluZyZvcGVyYXRvcj1lcSZ0YWFsPQ0KDQojbGFzdG5hbWVfZGYkbmFtZV9vcmlnaW5bMTRdDQpodHRwczovL3d3dy5jYmdmYW1pbGllbmFtZW4ubmwvbmZiL2RldGFpbF9uYWFtLnBocD9nYmFfbmFhbT0NCiAgVG9sc21hDQombmZkX25hYW09DQogIFRvbHNtYQ0KJmluZm89YW5hbHlzZStlbit2ZXJrbGFyaW5nJm9wZXJhdG9yPWVxJnRhYWw9DQoNCiNodHRwczovL3d3dy5jYmdmYW1pbGllbmFtZW4ubmwvbmZiL2RldGFpbF9uYWFtLnBocD9nYmFfbGNuYWFtPWtyYWF5a2FtcCZnYmFfbmFhbT1LcmFheWthbXAmbmZkX25hYW09S3JhYWlqa2FtcCUyMCh5KSZvcGVyYXRvcj1lcSZ0YWFsPQ0KDQoNCmhpZXIgc2xhYW4gd2UgYWxsZXMgb3ANCmBgYHtyLCBldmFsPUZBTFNFfQ0KbmFtZV9vcmlnaW5sIDwtIGxpc3QoKQ0KdGFibGVfb3JpZ2lubCA8LSBsaXN0KCkNCnRpbWUgPC0gMC4xDQpgYGANCg0KIyBjcnVjaWNhbCBzY3JhcGUgbG9vcA0KIA0KYGBge3IsIGV2YWw9RkFMU0V9DQoNCmZvciAoaSBpbiAxOm5yb3cobGFzdG5hbWVfZGYpKSB7DQogIHByaW50KGkpDQogIFN5cy5zbGVlcCh0aW1lKQ0KICB0cnlDYXRjaCh7IA0KICAgIG5hbWVfb3JpZ2lubFtbaV1dICA8LSByZWFkX2h0bWwobGFzdG5hbWVfZGZbaSwgYygibmFtZV9vcmlnaW4iKV0pDQogICAgdGFibGVfb3JpZ2lubFtbaV1dIDwtIG5hbWVfb3JpZ2lubFtbaV1dICU+JSBodG1sX3RhYmxlKCkNCiAgfSwgDQogICAgd2FybmluZyA9IGZ1bmN0aW9uKHcpIHsNCiAgICAgICAgY2F0KCJXQVJOSU5HOiIsIGNvbmRpdGlvbk1lc3NhZ2UodyksICJcbiIpICNXQVJOSU5HIG1lc3NhZ2UNCiAgICB9LA0KICAgIGVycm9yPWZ1bmN0aW9uKGUpew0KICAgICAgZXJyIDwtIGNvbmRpdGlvbk1lc3NhZ2UoZSkNCiAgICAgIGNhdCgiRXJyb3I6IiwgY29uZGl0aW9uTWVzc2FnZShlKSwgIlxuIikgI0VSUk9SIG1lc3NhZ2UNCiAgICB9ICANCiAgKQ0KfQ0KYGBgDQoNCmVuIHZhbmFmIGhpZXIgaXMgaGV0IGVpZ2VubGlqayBhbGxlZW4gbWFhciBvcHNjaG9uZW4uIA0KYGBge3IsIGV2YWw9RkFMU0V9DQpvcmlnaW5fdHh0IDwtIGxpc3QoKQ0KZm9yIChpIGluIDE6bGVuZ3RoKG5hbWVfb3JpZ2lubCkpIHsNCiAgICBvcmlnaW5fdHh0IFtbaV1dIDwtIG5hbWVfb3JpZ2lubFtbaV1dICU+JSBodG1sX3RleHQoKSAlPiUgYXMuY2hhcmFjdGVyKCkNCn0NCg0KYGBgDQoNCmBgYHtyLCBldmFsPUZBTFNFfQ0KIyBHZXQgb3V0IHRoZSByZWxldmFudCBvcmlnaW4gaW5mb3JtYXRpb24gZnJvbSB0aGUgeG1sIGxpc3RzDQpvcmlnaW5fbG4gPC0gbGlzdCgpDQoNCmZvciAoaSBpbiAxOmxlbmd0aChuYW1lX29yaWdpbmwpKSB7DQogIG9yaWdpbl9sbltbaV1dIDwtIG5hbWVfb3JpZ2lubFtbaV1dICU+JSBodG1sX25vZGVzKCJkaXYiKSAlPiUgcnZlc3Q6Omh0bWxfdGV4dCgpDQogIG9yaWdpbl9sbltbaV1dIDwtIG9yaWdpbl9sbltbaV1dW1szXV0NCn0NCg0KIyBSZW1vdmUgbWVzcw0KZm9yIChpIGluIDE6bGVuZ3RoKG9yaWdpbl9sbikpIHsNCiAgb3JpZ2luX2xuW1tpXV0gPC0gZ3N1YigiXFx0IiwgIiAiLCBvcmlnaW5fbG5bW2ldXSkNCiAgb3JpZ2luX2xuW1tpXV0gPC0gZ3N1YigiXFxuIiwgIiAiLCBvcmlnaW5fbG5bW2ldXSkNCn0NCg0KIyBGbGF0dGVuIG5lc3RlZCBzdHJ1Y3R1cmUgb2YgdGhlIG9yaWdpbiBpbmZvcm1hdGlvbg0KI29yaWdpbl9sbiA8LSByYmluZChmbGF0dGVuKG9yaWdpbl9sbikpDQoNCmBgYA0KDQoNCmBgYHtyIGV4dHJhY3RpbmctdmVya2xhcmluZy1rZW5tZXJrZW4sIGV2YWw9RkFMU0V9DQoNCiMgRGV0YWNoaW5nIHRoZSBuYW1lcyBhbmQgb3JpZ2luIGluZm8gZm9yIGVhc2llciBkYXRhIGhhbmRsaW5nDQpvcmlnaW4gPC0gdW5saXN0KG9yaWdpbl9sbikNCg0KDQoNCm9yaWdpbiA8LSBzdHJfZXh0cmFjdF9hbGwob3JpZ2luLCAidmFyaWFudGVuKC4qPynCqSIpDQoNCiMgT3JpZ2luIGluZm9ybWF0aW9uIGlzIHVzdWFsbHkgbWVudGlvbmVkIGFmdGVyICJ2ZXJrbGFyaW5nIiBvciAia2VubWVya2VuIg0Kb3JpZ2luIDwtIHN0cl9yZW1vdmVfYWxsKG9yaWdpbiwgInZhcmlhbnRlbiIpDQpvcmlnaW4gPC0gc3RyX3JlbW92ZV9hbGwob3JpZ2luLCAiQ0JHIEJyb25uZW4iKQ0Kb3JpZ2luIDwtIHN0cl9yZW1vdmVfYWxsKG9yaWdpbiwgImNhdGFsb2d1cyIpDQpvcmlnaW4gPC0gc3RyX3JlbW92ZV9hbGwob3JpZ2luLCAiwqkiKQ0KDQoNCnZlcmtsYXJpbmcgPC0gc3RyX3JlbW92ZV9hbGwob3JpZ2luLCAia2VubWVya2VuOiguKj8pJCIpDQprZW5tZXJrZW4gPC0gc3RyX2V4dHJhY3RfYWxsKG9yaWdpbiwgImtlbm1lcmtlbjooLio/KSQiKQ0Ka2VubWVya2VuIDwtIHN0cl9yZW1vdmVfYWxsKGtlbm1lcmtlbiwgInNwZWNpZmlla2UgY29tcG9uZW50ZW46KC4qPykkIikNCnNjIDwtIHN0cl9leHRyYWN0X2FsbChvcmlnaW4sICJzcGVjaWZpZWtlIGNvbXBvbmVudGVuOiguKj8pJCIpICMgTm90IGRpcmVjdGx5IHJlbGV2YW50IHRvIHVzLCBidXQgZG9lcyBtZWFuIHRoYXQgdGhlIG5hbWUgaGFzIGEgd2VicGFnZQ0KDQoNCiMgTWFrZSBpbnRvIGEgbmVhdCBkYXRhZnJhbWUgd2l0aCB0aGUgbmFtZXMgYXR0YWNoZWQNCnZlcmtsYXJpbmcgPC0gdHJpbXdzKHZlcmtsYXJpbmcsIHdoaWNoID0gImJvdGgiKQ0Ka2VubWVya2VuIDwtIHRyaW13cyhrZW5tZXJrZW4sIHdoaWNoID0gImJvdGgiKQ0Kc2MgPC0gdHJpbXdzKHNjLCB3aGljaCA9ICJib3RoIikNCnZrIDwtIGRhdGEuZnJhbWUobGFzdG5hbWVfZGYkaWQsbGFzdG5hbWVfZGYkbGFzdG5hbWUsIHZlcmtsYXJpbmcsIGtlbm1lcmtlbiwgc2MpDQoNCg0KYGBgDQoNCg0KDQojIFNlcGFyYXRpbmcgbmFtZXMgd2l0aCBEdXRjaCAmIHVua25vd24gb3JpZ2luDQpOZXh0LCB3ZSBpZGVudGlmeSB0aG9zZSBuYW1lcyBmb3Igd2hpY2ggbm8gYWRkaXRpb25hbCBpbmZvcm1hdGlvbiB3YXMgZm91bmQuIFRoaXMgaXMgaW1wb3J0YW50IHRvIGRpc3Rpbmd1aXNoIER1dGNoIG5hbWVzIGZyb20gbmFtZXMgd2l0aCB1bmtub3duIG9yaWdpbnMuDQoNCi0gRHV0Y2ggbmFtZXM6IG5vIGxhYmVsIGluZGljYXRpbmcgdGhhdCB0aGUgbmFtZSBpcyBEdXRjaCwgYnV0IHNvbWUgb3RoZXIgaW5mb3JtYXRpb24gYXZhaWxhYmxlIG9uIG5hbWUgb3JpZ2luDQotIFVua25vd24gbmFtZXM6IHdlYiBwYWdlIGNhbm5vdCBiZSBmb3VuZCwgc28gb3JpZ2luIGluZm9ybWF0aW9uIGlzIGVtcHR5LiANCg0KYGBge3Igb3JpZ2luLXVua25vd24sIGV2YWw9RkFMU0V9DQoNCiMgSWRlbnRpZnkgbGFzdCBuYW1lcyB0aGF0IGNvdWxkIG5vdCBiZSBmb3VuZA0KdmsgPC0gdmsgJT4lDQogIG11dGF0ZSh2ZXJrbGFyaW5nID0gaWZlbHNlKHZlcmtsYXJpbmc9PSIiLCAwLCB2ZXJrbGFyaW5nKSwgDQogICAgICAgICBrZW5tZXJrZW4gPSBpZmVsc2Uoa2VubWVya2VuPT0iY2hhcmFjdGVyKDApIiwgMCwga2VubWVya2VuKSwNCiAgICAgICAgIHNjID0gaWZlbHNlKHNjPT0iY2hhcmFjdGVyKDApIiwgMCwgc2MpLA0KICAgICAgICAgbm9faW5mbyA9IG5jaGFyKHZlcmtsYXJpbmcpICsgbmNoYXIoa2VubWVya2VuKSArIG5jaGFyKHNjKSkNCg0KdmsgPC0gdmsgJT4lDQogIG11dGF0ZShub19pbmZvID0gaWZlbHNlKG5vX2luZm89PTMsIDEsIDApLCANCiAgICAgICAgIHZlcmtsYXJpbmcgPSBpZmVsc2UodmVya2xhcmluZz09MCwgTkEsIHZlcmtsYXJpbmcpLA0KICAgICAgICAga2VubWVya2VuID0gaWZlbHNlKGtlbm1lcmtlbj09MCwgTkEsIGtlbm1lcmtlbikpDQojIElmIHRoZXJlIGlzIG5vIHRleHQgaW4gdmVya2xhcmluZyBvciBrZW5tZXJrZW4sIHRoZSBuYW1lIGNvdWxkIG5vdCBiZSBmb3VuZCBpbiB0aGUgZGF0YWJhc2VzLiANCmBgYA0KDQoNCiMgRXh0cmFjdGluZyBzcGVjaWZpYyBvcmlnaW4gaW5mb3JtYXRpb24NClRoZXJlIGFyZSB0aHJlZSBtYWluIHdheXMgdG8gZ2V0IGluZm9ybWF0aW9uIGFib3V0IHRoZSBvcmlnaW4gb2YgbGFzdCBuYW1lczoNCg0KMSkgVW5kZXIgImtlbm1lcmtlbiIsIGxhc3QgbmFtZXMgYXJlIGFzc2lnbmVkIGNsaWNrYWJsZSB0YWdzLiBUaGVzZSB0YWdzIGluY2x1ZGUgdW5zcGVjaWZpZWQgZm9yZWlnbiBuYW1lIHRhZ3MgKCJhbmRlcmUgdGFhbCIpLCBhcyB3ZWxsIGFzIHNwZWNpZmljIGZvcmVpZ24gb3JpZ2lucyBvZiB0aGUgbmFtZSAoIkZyYW5zZSBuYWFtIiwgIkluZGlzY2hlIG5hYW0iKS4NCi0+IG9yaWdpbjEgKyBvcmlnaW40DQoNCjIpIFNldmVyYWwgbmFtZXMgaGF2ZSBtb3JlIGV4dGVuc2l2ZWx5IHdyaXR0ZW4gb3V0IHN0b3JpZXMgYmVoaW5kIHRoZSBuYW1lLCB1bmRlciAidmVya2xhcmluZyIuIEEgbnVtYmVyIG9mIG5hbWVzIGNvbnRhaW4gZGV0YWlsZWQgKGVpdGhlciBjb3VudHJ5LWxldmVsIG9yIHJlZ2lvbmFsKSBvcmlnaW5zLCB1c3VhbGx5IGluIHRoZSBmb3JtIG9mICJEZSBuYWFtIFt4eXpdIGlzIGFma29tc3RpZyB1aXQgW2NvdW50cnldIi4gDQotPiBvcmlnaW4yIA0KDQozKSBTb21lIG5hbWVzIGhhdmUgb3JpZ2luIGluZm9ybWF0aW9uIHVuZGVyICJ2ZXJrbGFyaW5nIiBpbiB0aGUgZm9ybSBvZiB0aGUgbGluZ3Vpc3RpYyBvcmlnaW5zIG9mIHRoZSBuYW1lLiBUaGlzIGNhbiBiZSBjb3VudHJ5IHNwZWNpZmljIChlLmcuIENoaW5lc2UgbmFtZSksIGJ1dCBpdCBjYW4gYWxzbyBhcHBseSB0byBtdWx0aXBsZSBjb3VudHJpZXMgd2hlbiB0aGUgbGFuZ3VhZ2UgaXMgc3Bva2VuIGluIG1vcmUgdGhhbiAxIGNvdW50cmllcyAoZS5nLiBTcGFuaXNoIG5hbWUpLiANCi0+IG9yaWdpbjMgDQoNCkluIGhldCBzY3JpcHQgaGllcm9uZGVyIGdhYXQgZXJnZW5zIGlldHMgZm91dC4gRGFhcm9tIGhlYiBpayBldG5pY2l0ZWl0IHVpdGVpbmRlbGlqayBtZXQgZGUgaGFuZCB1aXRnZXJla2VuZC4NCg0KYGBge3IgY291bnRyaWVzLWV4dHJhY3QsIGV2YWw9RkFMU0V9DQoNCiMgU3RlcCAxOiBleHRyYWN0aW5nIG9yaWdpbiB0YWdzIGZyb20ga2VubWVya2VuDQp2ayA8LSB2ayAlPiUNCiAgbXV0YXRlKG9yaWdpbjEgPSBzdHJfZXh0cmFjdChrZW5tZXJrZW4sICJbOnVwcGVyOl0oWzpsb3dlcjpdezIsfSkgbmFhbSIpKQ0KDQojIE5vdGU6IHNvbWV0aW1lcyBtdWx0aXBsZSBvcmlnaW5zIGFyZSBtZW50aW9uZWQuIEN1cnJlbnRseSwgSSBvbmx5IGV4dHJhY3QgdGhlIGZpcnN0IG9uZS4gT3RoZXJ3aXNlLCB3ZSBzaG91bGQgdXNlIHN0cl9leHRyYWN0X2FsbC4gDQoNCg0KDQojIFN0ZXAgMjogZXh0cmFjdGluZyBvcmlnaW4gaW5mbyBmcm9tIHZlcmtsYXJpbmcgDQp2ayA8LSB2ayAlPiUNCiAgbXV0YXRlKG9yaWdpbjIgPSBpZmVsc2UoYXMubnVtZXJpYyhzdHJfZGV0ZWN0KHZlcmtsYXJpbmcsICJhZmtvbXN0aWcgdWl0IikpID09IDEsIA0KICAgICAgICAgc3RyX3JlbW92ZSh2ZXJrbGFyaW5nLCAiLiphZmtvbXN0aWcgdWl0IiksIE5BKSkNCg0KDQojIFN0ZXAgMzogZXh0cmFjdGluZyBhZGRpdGlvbmFsIG9yaWdpbiBpbmZvIGZyb20gdmVya2xhcmluZw0KdmsgPC0gdmsgJT4lDQogIG11dGF0ZShvcmlnaW4zID0gc3RyX2V4dHJhY3QodmVya2xhcmluZywgIls6dXBwZXI6XShbOmxvd2VyOl17Mix9KSAoYWNodGVyKT8oZmFtaWxpZSk/KGJlcm9lcHMpP25hYW0iKSkNCg0KDQoNCiMgRmluYWxseSwgd2UgY2xlYW4gdXAgdGhlIG9yaWdpbiBpbmZvcm1hdGlvbiBleHRyYWN0ZWQgYWJvdmUNCg0KIyBPcmlnaW4xOiBhbHJlYWR5IG5lYXQNCnZrJG9yaWdpbjEgPC0gc3RyX3JlbW92ZSh2ayRvcmlnaW4xLCAiSm9vZHNlIG5hYW0iKSAjIGNhbiBiZSBEdXRjaCAmIG5vbi1EdXRjaA0KDQojIE9yaWdpbjI6IG1lc3N5DQp2ayRvcmlnaW4yIDwtIHN0cl9yZW1vdmUodmskb3JpZ2luMiwgIlxcLi4qIikgIyByZW1vdmUgZXh0cmEgaW5mbyBpbiB0aGUgZm9sbG93aW5nIHNlbnRlbmNlIA0Kdmskb3JpZ2luMiA8LSBzdHJfcmVtb3ZlKHZrJG9yaWdpbjIsICJcXDsuKiIpICMgcmVtb3ZlIGV4dHJhIGluZm8gaW4gdGhlIGZvbGxvd2luZyBzZW50ZW5jZSANCnZrJG9yaWdpbjIgPC0gc3RyX3JlbW92ZSh2ayRvcmlnaW4yLCAiXFwoLioiKSAjIHJlbW92ZSBleHRyYSBpbmZvIGluIHRoZSBmb2xsb3dpbmcgc2VudGVuY2UgDQoNCg0KdmskZHBnIDwtIGFzLm51bWVyaWMoc3RyX2RldGVjdCh2ayRvcmlnaW4yLCAiKGRvcnApfChwbGFhdHMpfChnZW1lZW50ZSl8KGdyYWFmc2NoYXApfChzdGFkKXwoZGVlbCl8KEZyaWVzbGFuZCkiKSkgIyBvcmlnaW4gaW5mbyB0b28gcmVnaW9uYWwgDQp2ayA8LSB2ayAlPiUgbXV0YXRlKG9yaWdpbjIgPSBpZmVsc2UoKGRwZz09MSksIE5BLCBvcmlnaW4yKSkgIyByZW1vdmluZyByZWdpb25hbCBvcmlnaW4gaW5mbw0KdmsgPC0gc3Vic2V0KHZrLCBzZWxlY3QgPSAtZHBnKSAjIHJlbW92aW5nIGludGVybWVkaWF0ZSB2YXJpYWJsZQ0KDQojIFNvbWV0aW1lcywgdGhlcmUgd2VyZSBtdWx0aXBsZSBjb3VudHJpZXMgbWVudGlvbmVkLiBUYWtlIG9ubHkgdGhlIGZpcnN0Og0Kdmskb3JpZ2luMiA8LSBzdHJfcmVtb3ZlKHZrJG9yaWdpbjIsICJcXCwuKiIpICMgT25seSBmaXJzdA0Kdmskb3JpZ2luMiA8LSBzdHJfcmVtb3ZlKHZrJG9yaWdpbjIsICJcXHMoZW4pLioiKSAjIE9ubHkgZmlyc3QgDQp2ayRvcmlnaW4yIDwtIHN0cl9yZW1vdmUodmskb3JpZ2luMiwgIlxccyhvZikuKiIpICMgT25seSBmaXJzdCANCg0KDQojIE9yaWdpbjM6IHByZXR0eSBuZWF0DQp2ayRvcmlnaW4zIDwtIHN0cl9yZW1vdmUodmskb3JpZ2luMywgIkQoaSk/ZSh6ZSk/IChmYW1pbGllKT8oYWNodGVyKT8oYmVyb2Vwcyk/bmFhbSIpICMgc2xpcHBlZCB0aHJvdWdoIHRoZSByZWdleA0Kdmskb3JpZ2luMyA8LSBzdHJfcmVtb3ZlKHZrJG9yaWdpbjMsICJFZW4gKGZhbWlsaWUpPyhhY2h0ZXIpPyhiZXJvZXBzKT9uYWFtIikgIyBzbGlwcGVkIHRocm91Z2ggdGhlIHJlZ2V4DQp2ayRvcmlnaW4zIDwtIHN0cl9yZW1vdmUodmskb3JpZ2luMywgIlppam4gKGZhbWlsaWUpPyhhY2h0ZXIpPyhiZXJvZXBzKT9uYWFtIikgIyBzbGlwcGVkIHRocm91Z2ggdGhlIHJlZ2V4DQp2ayRvcmlnaW4zIDwtIHN0cl9yZW1vdmUodmskb3JpZ2luMywgIkFscyAoZmFtaWxpZSk/KGFjaHRlcik/KGJlcm9lcHMpP25hYW0iKSAjIHNsaXBwZWQgdGhyb3VnaCB0aGUgcmVnZXgNCnZrJG9yaWdpbjMgPC0gc3RyX3JlbW92ZSh2ayRvcmlnaW4zLCAiSm9vZHNlIChmYW1pbGllKT8oYWNodGVyKT9uYWFtIikNCnZrJG9yaWdpbjMgPC0gc3RyX3JlbW92ZSh2ayRvcmlnaW4zLCAiQmlqYmVsc2UgKGZhbWlsaWUpPyhhY2h0ZXIpP25hYW0iKQ0KDQoNCiMgU2V0dGluZyBlbXB0eSBvcmlnaW4gdmFyaWFibGVzIHRvIE5BIChEdXRjaCBvciB1bmZvdW5kIGZvcmVpZ24pDQp2ayA8LSB2ayAlPiUNCiAgbXV0YXRlKG9yaWdpbjEgPSBhcy5jaGFyYWN0ZXIoaWZlbHNlKG9yaWdpbjE9PSIifG9yaWdpbjE9PSJjaGFyYWN0ZXIoMCkiLCBOQSwgb3JpZ2luMSkpLA0KICAgICAgICAgb3JpZ2luMiA9IGFzLmNoYXJhY3RlcihpZmVsc2Uob3JpZ2luMj09IiJ8b3JpZ2luMj09ImNoYXJhY3RlcigwKSIsIE5BLCBvcmlnaW4yKSksDQogICAgICAgICBvcmlnaW4zID0gYXMuY2hhcmFjdGVyKGlmZWxzZShvcmlnaW4zPT0iInxvcmlnaW4zPT0iY2hhcmFjdGVyKDApIiwgTkEsIG9yaWdpbjMpKSkNCg0KDQoNCiMgRmluYWxseSwgdGhlIHRhZyAiYW5kZXJlIHRhYWwiIHdhcyB1c2VkIHRvIGRpc3Rpbmd1aXNoIGZvcmVpZ24gbmFtZXMgb2YgdW5rbm93biBvcmlnaW4gZnJvbSBrbm93biBEdXRjaCBuYW1lcy4gDQp2ayA8LSB2ayAlPiUNCiAgbXV0YXRlKG9yaWdpbjQgPSBpZmVsc2UoKGFzLm51bWVyaWMoc3RyX2RldGVjdChrZW5tZXJrZW4sICJhbmRlcmUgdGFhbCIpKT09MSksICJub24tRHV0Y2giLCBOQSkpDQoNCnZvb3J2b2Vnc2Vsc25sIDwtIGMoIid0IiwgImQnIiwgImRlIiwgImRlIGxhIiwgImRlbiIsICJkZWwiLCAiZGVyIiwgImRlcyIsICJpbiAndCIsICJvcCBkZSIsICJvcCBkZW4iLCAidGVuIiwgInRlciIsICJ0ZXMiLCAidmFuIiwgInZhbiAndCIsICJ2YW4gZGUiICwgInZhbiBkZXIiLCAidmFuIGRlbiIpIA0KDQp2ayRubCA8LSAwDQoNCmZvciAoaSBpbiAxOiBsZW5ndGgodmskbGFzdG5hbWVfZGYubGFzdG5hbWUpKSB7DQogIGlmIChzdW0oc3RyX2RldGVjdChsYXN0bmFtZV9kZiRucFtpXSwgdm9vcnZvZWdzZWxzbmwpKT4wKSB7DQogICAgdmskbmxbaV0gPC0gMQ0KICB9DQp9DQoNCnZrJGR1dGNoIDwtIGlmZWxzZSgoKHZrJG5vX2luZm89PTEgfCB2ayRub19pbmZvPT0ibm9uX0R1dGNoIikgJiAhdmskbmw9PTEpLCAwLCAxKQ0KbmFtZXModmspDQp2ayAlPiUgc2VsZWN0KGMoImxhc3RuYW1lX2RmLmlkIiwgImxhc3RuYW1lX2RmLmxhc3RuYW1lIiwgImR1dGNoIikpDQoNCmBgYA0KDQpgYGB7ciwgZXZhbD1GQUxTRX0NCnNhdmUodmssIGZpbGU9InZrMjAyMjEwMDYuUkRhdGEiKSANCmBgYA0KDQpgYGB7ciwgZXZhbD1GQUxTRX0NCm5hbWVzX2RmJGR1dGNoIDwtIHZrJGR1dGNoDQpgYGANCg0KDQpgYGB7ciwgZXZhbD1GQUxTRX0NCnNhdmUobmFtZXNfZGYsIGZpbGU9Im5hbWVzX2RmX3YyMDIyMTAwNi5SRGF0YSIpIA0KYGBgDQoNCg0KSWsgem91IGFsbGVzIHdhYXIgYG5vX2luZm9gIG9wIDEgc3RhYXQgb2Ygd2FhciBgb3JpZ2luNGAgb3AgIm5vbi1EdXRjaCIgc3RhYXQgY29kZXJlbiBhbHMgYnVpdGVubGFuZHMhIA0KDQoNCg==