1 getting started

#start with clean workspace 
rm(list=ls())
load("./data/names_df_v20221005.RData")

2 packages

library(data.table) 
library(tidyverse) 
require(xml2)
require(rvest)
require(devtools)
require(scholar)
require(stringi)

andere strategie. alle nobiliary particles er af. dan zonder deze door databank. dan indien geen info, via particles NL-identiteit (of spaans of Duits of arabisch)

lastname_df <- names_df
lastname_df$lastname2 <- as.character(str_split(lastname_df$lastname, pattern=",", n = 2, simplify = TRUE)[,1])
lastname_df$np <- as.character(str_split(lastname_df$lastname, pattern=",", n = 2, simplify = TRUE)[,2])
# creating URLs: origin
flaname <- function(x){
    paste(toupper(substring(x, 1, 1)),
          tolower(substring(x, 2, nchar(x))),
          sep = "")
}
lastname_df$lastname2 <- flaname(lastname_df$lastname2)

lastname_df$name_origin <- paste0("https://www.cbgfamilienamen.nl/nfb/detail_naam.php?gba_naam=",
                                lastname_df$lastname2,
                                "&nfd_naam=",
                                lastname_df$lastname2,
                                "&info=analyse+en+verklaring&operator=eq&taal=")

https://www.cbgfamilienamen.nl/nfb/detail_naam.php?gba_naam=tolsma&gba_naam=Tolsma&nfd_naam=&info=analyse+en+verklaring&operator=eq&taal=

#lastname_df$name_origin[14] https://www.cbgfamilienamen.nl/nfb/detail_naam.php?gba_naam= Tolsma &nfd_naam= Tolsma &info=analyse+en+verklaring&operator=eq&taal=

#https://www.cbgfamilienamen.nl/nfb/detail_naam.php?gba_lcnaam=kraaykamp&gba_naam=Kraaykamp&nfd_naam=Kraaijkamp%20(y)&operator=eq&taal=

hier slaan we alles op

name_originl <- list()
table_originl <- list()
time <- 0.1

3 crucical scrape loop

for (i in 1:nrow(lastname_df)) {
  print(i)
  Sys.sleep(time)
  tryCatch({ 
    name_originl[[i]]  <- read_html(lastname_df[i, c("name_origin")])
    table_originl[[i]] <- name_originl[[i]] %>% html_table()
  }, 
    warning = function(w) {
        cat("WARNING:", conditionMessage(w), "\n") #WARNING message
    },
    error=function(e){
      err <- conditionMessage(e)
      cat("Error:", conditionMessage(e), "\n") #ERROR message
    }  
  )
}

en vanaf hier is het eigenlijk alleen maar opschonen.

origin_txt <- list()
for (i in 1:length(name_originl)) {
    origin_txt [[i]] <- name_originl[[i]] %>% html_text() %>% as.character()
}
# Get out the relevant origin information from the xml lists
origin_ln <- list()

for (i in 1:length(name_originl)) {
  origin_ln[[i]] <- name_originl[[i]] %>% html_nodes("div") %>% rvest::html_text()
  origin_ln[[i]] <- origin_ln[[i]][[3]]
}

# Remove mess
for (i in 1:length(origin_ln)) {
  origin_ln[[i]] <- gsub("\\t", " ", origin_ln[[i]])
  origin_ln[[i]] <- gsub("\\n", " ", origin_ln[[i]])
}

# Flatten nested structure of the origin information
#origin_ln <- rbind(flatten(origin_ln))
# Detaching the names and origin info for easier data handling
origin <- unlist(origin_ln)



origin <- str_extract_all(origin, "varianten(.*?)©")

# Origin information is usually mentioned after "verklaring" or "kenmerken"
origin <- str_remove_all(origin, "varianten")
origin <- str_remove_all(origin, "CBG Bronnen")
origin <- str_remove_all(origin, "catalogus")
origin <- str_remove_all(origin, "©")


verklaring <- str_remove_all(origin, "kenmerken:(.*?)$")
kenmerken <- str_extract_all(origin, "kenmerken:(.*?)$")
kenmerken <- str_remove_all(kenmerken, "specifieke componenten:(.*?)$")
sc <- str_extract_all(origin, "specifieke componenten:(.*?)$") # Not directly relevant to us, but does mean that the name has a webpage


# Make into a neat dataframe with the names attached
verklaring <- trimws(verklaring, which = "both")
kenmerken <- trimws(kenmerken, which = "both")
sc <- trimws(sc, which = "both")
vk <- data.frame(lastname_df$id,lastname_df$lastname, verklaring, kenmerken, sc)

4 Separating names with Dutch & unknown origin

Next, we identify those names for which no additional information was found. This is important to distinguish Dutch names from names with unknown origins.

  • Dutch names: no label indicating that the name is Dutch, but some other information available on name origin
  • Unknown names: web page cannot be found, so origin information is empty.
# Identify last names that could not be found
vk <- vk %>%
  mutate(verklaring = ifelse(verklaring=="", 0, verklaring), 
         kenmerken = ifelse(kenmerken=="character(0)", 0, kenmerken),
         sc = ifelse(sc=="character(0)", 0, sc),
         no_info = nchar(verklaring) + nchar(kenmerken) + nchar(sc))

vk <- vk %>%
  mutate(no_info = ifelse(no_info==3, 1, 0), 
         verklaring = ifelse(verklaring==0, NA, verklaring),
         kenmerken = ifelse(kenmerken==0, NA, kenmerken))
# If there is no text in verklaring or kenmerken, the name could not be found in the databases. 

5 Extracting specific origin information

There are three main ways to get information about the origin of last names:

  1. Under “kenmerken”, last names are assigned clickable tags. These tags include unspecified foreign name tags (“andere taal”), as well as specific foreign origins of the name (“Franse naam”, “Indische naam”). -> origin1 + origin4

  2. Several names have more extensively written out stories behind the name, under “verklaring”. A number of names contain detailed (either country-level or regional) origins, usually in the form of “De naam [xyz] is afkomstig uit [country]”. -> origin2

  3. Some names have origin information under “verklaring” in the form of the linguistic origins of the name. This can be country specific (e.g. Chinese name), but it can also apply to multiple countries when the language is spoken in more than 1 countries (e.g. Spanish name). -> origin3

In het script hieronder gaat ergens iets fout. Daarom heb ik etniciteit uiteindelijk met de hand uitgerekend.

# Step 1: extracting origin tags from kenmerken
vk <- vk %>%
  mutate(origin1 = str_extract(kenmerken, "[:upper:]([:lower:]{2,}) naam"))

# Note: sometimes multiple origins are mentioned. Currently, I only extract the first one. Otherwise, we should use str_extract_all. 



# Step 2: extracting origin info from verklaring 
vk <- vk %>%
  mutate(origin2 = ifelse(as.numeric(str_detect(verklaring, "afkomstig uit")) == 1, 
         str_remove(verklaring, ".*afkomstig uit"), NA))


# Step 3: extracting additional origin info from verklaring
vk <- vk %>%
  mutate(origin3 = str_extract(verklaring, "[:upper:]([:lower:]{2,}) (achter)?(familie)?(beroeps)?naam"))



# Finally, we clean up the origin information extracted above

# Origin1: already neat
vk$origin1 <- str_remove(vk$origin1, "Joodse naam") # can be Dutch & non-Dutch

# Origin2: messy
vk$origin2 <- str_remove(vk$origin2, "\\..*") # remove extra info in the following sentence 
vk$origin2 <- str_remove(vk$origin2, "\\;.*") # remove extra info in the following sentence 
vk$origin2 <- str_remove(vk$origin2, "\\(.*") # remove extra info in the following sentence 


vk$dpg <- as.numeric(str_detect(vk$origin2, "(dorp)|(plaats)|(gemeente)|(graafschap)|(stad)|(deel)|(Friesland)")) # origin info too regional 
vk <- vk %>% mutate(origin2 = ifelse((dpg==1), NA, origin2)) # removing regional origin info
vk <- subset(vk, select = -dpg) # removing intermediate variable

# Sometimes, there were multiple countries mentioned. Take only the first:
vk$origin2 <- str_remove(vk$origin2, "\\,.*") # Only first
vk$origin2 <- str_remove(vk$origin2, "\\s(en).*") # Only first 
vk$origin2 <- str_remove(vk$origin2, "\\s(of).*") # Only first 


# Origin3: pretty neat
vk$origin3 <- str_remove(vk$origin3, "D(i)?e(ze)? (familie)?(achter)?(beroeps)?naam") # slipped through the regex
vk$origin3 <- str_remove(vk$origin3, "Een (familie)?(achter)?(beroeps)?naam") # slipped through the regex
vk$origin3 <- str_remove(vk$origin3, "Zijn (familie)?(achter)?(beroeps)?naam") # slipped through the regex
vk$origin3 <- str_remove(vk$origin3, "Als (familie)?(achter)?(beroeps)?naam") # slipped through the regex
vk$origin3 <- str_remove(vk$origin3, "Joodse (familie)?(achter)?naam")
vk$origin3 <- str_remove(vk$origin3, "Bijbelse (familie)?(achter)?naam")


# Setting empty origin variables to NA (Dutch or unfound foreign)
vk <- vk %>%
  mutate(origin1 = as.character(ifelse(origin1==""|origin1=="character(0)", NA, origin1)),
         origin2 = as.character(ifelse(origin2==""|origin2=="character(0)", NA, origin2)),
         origin3 = as.character(ifelse(origin3==""|origin3=="character(0)", NA, origin3)))



# Finally, the tag "andere taal" was used to distinguish foreign names of unknown origin from known Dutch names. 
vk <- vk %>%
  mutate(origin4 = ifelse((as.numeric(str_detect(kenmerken, "andere taal"))==1), "non-Dutch", NA))

voorvoegselsnl <- c("'t", "d'", "de", "de la", "den", "del", "der", "des", "in 't", "op de", "op den", "ten", "ter", "tes", "van", "van 't", "van de" , "van der", "van den") 

vk$nl <- 0

for (i in 1: length(vk$lastname_df.lastname)) {
  if (sum(str_detect(lastname_df$np[i], voorvoegselsnl))>0) {
    vk$nl[i] <- 1
  }
}

vk$dutch <- ifelse(((vk$no_info==1 | vk$no_info=="non_Dutch") & !vk$nl==1), 0, 1)
names(vk)
vk %>% select(c("lastname_df.id", "lastname_df.lastname", "dutch"))
save(vk, file="vk20221006.RData") 
names_df$dutch <- vk$dutch
save(names_df, file="names_df_v20221006.RData") 

Ik zou alles waar no_info op 1 staat of waar origin4 op “non-Dutch” staat coderen als buitenlands!

LS0tDQp0aXRsZTogIjMuIE9yaWdpbiINCmF1dGhvcjogImJ5OiBOaW5hIEJyYW50ZW4iDQpiaWJsaW9ncmFwaHk6IHJlZmVyZW5jZXMuYmliDQotLS0NCg0KDQoNCmBgYHtyLCBpbnN0YWxsIHJlbW90ZWx5LCBnbG9iYWxzZXR0aW5ncywgZWNobz1GQUxTRSwgd2FybmluZz1GQUxTRSwgcmVzdWx0cz0naGlkZScsIGV2YWw9RkFMU0V9DQppbnN0YWxsLnBhY2thZ2VzKCJyZW1vdGVzIikNCnJlbW90ZXM6Omluc3RhbGxfZ2l0aHViKCJybGVzdXIva2xpcHB5IikNCmBgYCANCg0KYGBge3IsIGdsb2JhbHNldHRpbmdzLCBlY2hvPUZBTFNFLCB3YXJuaW5nPUZBTFNFLCByZXN1bHRzPSdoaWRlJ30NCmxpYnJhcnkoa25pdHIpDQpsaWJyYXJ5KHJnbCkNCg0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQ0Kb3B0c19jaHVuayRzZXQodGlkeS5vcHRzPWxpc3Qod2lkdGguY3V0b2ZmPTEwMCksdGlkeT1UUlVFLCB3YXJuaW5nID0gRkFMU0UsIG1lc3NhZ2UgPSBGQUxTRSxjb21tZW50ID0gIiM+IiwgY2FjaGU9VFJVRSwgY2xhc3Muc291cmNlPWMoInRlc3QiKSwgY2xhc3Mub3V0cHV0PWMoInRlc3QyIikpDQpvcHRpb25zKHdpZHRoID0gMTAwKQ0KcmdsOjpzZXR1cEtuaXRyKCkNCg0KDQoNCmNvbG9yaXplIDwtIGZ1bmN0aW9uKHgsIGNvbG9yKSB7c3ByaW50ZigiPHNwYW4gc3R5bGU9J2NvbG9yOiAlczsnPiVzPC9zcGFuPiIsIGNvbG9yLCB4KSB9DQoNCmBgYA0KDQpgYGB7ciBrbGlwcHksIGVjaG89RkFMU0UsIGluY2x1ZGU9VFJVRX0NCmtsaXBweTo6a2xpcHB5KHBvc2l0aW9uID0gYygndG9wJywgJ3JpZ2h0JykpDQoja2xpcHB5OjprbGlwcHkoY29sb3IgPSAnZGFya3JlZCcpDQoja2xpcHB5OjprbGlwcHkodG9vbHRpcF9tZXNzYWdlID0gJ0NsaWNrIHRvIGNvcHknLCB0b29sdGlwX3N1Y2Nlc3MgPSAnRG9uZScpDQpgYGANCg0KIyBnZXR0aW5nIHN0YXJ0ZWQNCg0KYGBge3IsIGV2YWw9RkFMU0V9DQojc3RhcnQgd2l0aCBjbGVhbiB3b3Jrc3BhY2UgDQpybShsaXN0PWxzKCkpDQpsb2FkKCIuL2RhdGEvbmFtZXNfZGZfdjIwMjIxMDA1LlJEYXRhIikNCmBgYA0KDQoNCg0KIyBwYWNrYWdlcw0KDQpgYGB7cn0NCmxpYnJhcnkoZGF0YS50YWJsZSkgDQpsaWJyYXJ5KHRpZHl2ZXJzZSkgDQpyZXF1aXJlKHhtbDIpDQpyZXF1aXJlKHJ2ZXN0KQ0KcmVxdWlyZShkZXZ0b29scykNCnJlcXVpcmUoc2Nob2xhcikNCnJlcXVpcmUoc3RyaW5naSkNCg0KYGBgDQoNCmFuZGVyZSBzdHJhdGVnaWUuIGFsbGUgbm9iaWxpYXJ5IHBhcnRpY2xlcyBlciBhZi4NCmRhbiB6b25kZXIgZGV6ZSBkb29yIGRhdGFiYW5rLiANCmRhbiBpbmRpZW4gZ2VlbiBpbmZvLCB2aWEgcGFydGljbGVzIE5MLWlkZW50aXRlaXQgKG9mIHNwYWFucyBvZiBEdWl0cyBvZiBhcmFiaXNjaCkNCg0KYGBge3IsIGV2YWw9RkFMU0V9DQpsYXN0bmFtZV9kZiA8LSBuYW1lc19kZg0KbGFzdG5hbWVfZGYkbGFzdG5hbWUyIDwtIGFzLmNoYXJhY3RlcihzdHJfc3BsaXQobGFzdG5hbWVfZGYkbGFzdG5hbWUsIHBhdHRlcm49IiwiLCBuID0gMiwgc2ltcGxpZnkgPSBUUlVFKVssMV0pDQpsYXN0bmFtZV9kZiRucCA8LSBhcy5jaGFyYWN0ZXIoc3RyX3NwbGl0KGxhc3RuYW1lX2RmJGxhc3RuYW1lLCBwYXR0ZXJuPSIsIiwgbiA9IDIsIHNpbXBsaWZ5ID0gVFJVRSlbLDJdKQ0KYGBgDQoNCg0KYGBge3IsIGV2YWw9RkFMU0V9DQojIGNyZWF0aW5nIFVSTHM6IG9yaWdpbg0KZmxhbmFtZSA8LSBmdW5jdGlvbih4KXsNCiAgICBwYXN0ZSh0b3VwcGVyKHN1YnN0cmluZyh4LCAxLCAxKSksDQogICAgICAgICAgdG9sb3dlcihzdWJzdHJpbmcoeCwgMiwgbmNoYXIoeCkpKSwNCiAgICAgICAgICBzZXAgPSAiIikNCn0NCmxhc3RuYW1lX2RmJGxhc3RuYW1lMiA8LSBmbGFuYW1lKGxhc3RuYW1lX2RmJGxhc3RuYW1lMikNCg0KbGFzdG5hbWVfZGYkbmFtZV9vcmlnaW4gPC0gcGFzdGUwKCJodHRwczovL3d3dy5jYmdmYW1pbGllbmFtZW4ubmwvbmZiL2RldGFpbF9uYWFtLnBocD9nYmFfbmFhbT0iLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBsYXN0bmFtZV9kZiRsYXN0bmFtZTIsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICImbmZkX25hYW09IiwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbGFzdG5hbWVfZGYkbGFzdG5hbWUyLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiJmluZm89YW5hbHlzZStlbit2ZXJrbGFyaW5nJm9wZXJhdG9yPWVxJnRhYWw9IikNCmBgYA0KDQpodHRwczovL3d3dy5jYmdmYW1pbGllbmFtZW4ubmwvbmZiL2RldGFpbF9uYWFtLnBocD9nYmFfbmFhbT10b2xzbWEmZ2JhX25hYW09VG9sc21hJm5mZF9uYWFtPSZpbmZvPWFuYWx5c2UrZW4rdmVya2xhcmluZyZvcGVyYXRvcj1lcSZ0YWFsPQ0KDQojbGFzdG5hbWVfZGYkbmFtZV9vcmlnaW5bMTRdDQpodHRwczovL3d3dy5jYmdmYW1pbGllbmFtZW4ubmwvbmZiL2RldGFpbF9uYWFtLnBocD9nYmFfbmFhbT0NCiAgVG9sc21hDQombmZkX25hYW09DQogIFRvbHNtYQ0KJmluZm89YW5hbHlzZStlbit2ZXJrbGFyaW5nJm9wZXJhdG9yPWVxJnRhYWw9DQoNCiNodHRwczovL3d3dy5jYmdmYW1pbGllbmFtZW4ubmwvbmZiL2RldGFpbF9uYWFtLnBocD9nYmFfbGNuYWFtPWtyYWF5a2FtcCZnYmFfbmFhbT1LcmFheWthbXAmbmZkX25hYW09S3JhYWlqa2FtcCUyMCh5KSZvcGVyYXRvcj1lcSZ0YWFsPQ0KDQoNCmhpZXIgc2xhYW4gd2UgYWxsZXMgb3ANCmBgYHtyLCBldmFsPUZBTFNFfQ0KbmFtZV9vcmlnaW5sIDwtIGxpc3QoKQ0KdGFibGVfb3JpZ2lubCA8LSBsaXN0KCkNCnRpbWUgPC0gMC4xDQpgYGANCg0KIyBjcnVjaWNhbCBzY3JhcGUgbG9vcA0KIA0KYGBge3IsIGV2YWw9RkFMU0V9DQoNCmZvciAoaSBpbiAxOm5yb3cobGFzdG5hbWVfZGYpKSB7DQogIHByaW50KGkpDQogIFN5cy5zbGVlcCh0aW1lKQ0KICB0cnlDYXRjaCh7IA0KICAgIG5hbWVfb3JpZ2lubFtbaV1dICA8LSByZWFkX2h0bWwobGFzdG5hbWVfZGZbaSwgYygibmFtZV9vcmlnaW4iKV0pDQogICAgdGFibGVfb3JpZ2lubFtbaV1dIDwtIG5hbWVfb3JpZ2lubFtbaV1dICU+JSBodG1sX3RhYmxlKCkNCiAgfSwgDQogICAgd2FybmluZyA9IGZ1bmN0aW9uKHcpIHsNCiAgICAgICAgY2F0KCJXQVJOSU5HOiIsIGNvbmRpdGlvbk1lc3NhZ2UodyksICJcbiIpICNXQVJOSU5HIG1lc3NhZ2UNCiAgICB9LA0KICAgIGVycm9yPWZ1bmN0aW9uKGUpew0KICAgICAgZXJyIDwtIGNvbmRpdGlvbk1lc3NhZ2UoZSkNCiAgICAgIGNhdCgiRXJyb3I6IiwgY29uZGl0aW9uTWVzc2FnZShlKSwgIlxuIikgI0VSUk9SIG1lc3NhZ2UNCiAgICB9ICANCiAgKQ0KfQ0KYGBgDQoNCmVuIHZhbmFmIGhpZXIgaXMgaGV0IGVpZ2VubGlqayBhbGxlZW4gbWFhciBvcHNjaG9uZW4uIA0KYGBge3IsIGV2YWw9RkFMU0V9DQpvcmlnaW5fdHh0IDwtIGxpc3QoKQ0KZm9yIChpIGluIDE6bGVuZ3RoKG5hbWVfb3JpZ2lubCkpIHsNCiAgICBvcmlnaW5fdHh0IFtbaV1dIDwtIG5hbWVfb3JpZ2lubFtbaV1dICU+JSBodG1sX3RleHQoKSAlPiUgYXMuY2hhcmFjdGVyKCkNCn0NCg0KYGBgDQoNCmBgYHtyLCBldmFsPUZBTFNFfQ0KIyBHZXQgb3V0IHRoZSByZWxldmFudCBvcmlnaW4gaW5mb3JtYXRpb24gZnJvbSB0aGUgeG1sIGxpc3RzDQpvcmlnaW5fbG4gPC0gbGlzdCgpDQoNCmZvciAoaSBpbiAxOmxlbmd0aChuYW1lX29yaWdpbmwpKSB7DQogIG9yaWdpbl9sbltbaV1dIDwtIG5hbWVfb3JpZ2lubFtbaV1dICU+JSBodG1sX25vZGVzKCJkaXYiKSAlPiUgcnZlc3Q6Omh0bWxfdGV4dCgpDQogIG9yaWdpbl9sbltbaV1dIDwtIG9yaWdpbl9sbltbaV1dW1szXV0NCn0NCg0KIyBSZW1vdmUgbWVzcw0KZm9yIChpIGluIDE6bGVuZ3RoKG9yaWdpbl9sbikpIHsNCiAgb3JpZ2luX2xuW1tpXV0gPC0gZ3N1YigiXFx0IiwgIiAiLCBvcmlnaW5fbG5bW2ldXSkNCiAgb3JpZ2luX2xuW1tpXV0gPC0gZ3N1YigiXFxuIiwgIiAiLCBvcmlnaW5fbG5bW2ldXSkNCn0NCg0KIyBGbGF0dGVuIG5lc3RlZCBzdHJ1Y3R1cmUgb2YgdGhlIG9yaWdpbiBpbmZvcm1hdGlvbg0KI29yaWdpbl9sbiA8LSByYmluZChmbGF0dGVuKG9yaWdpbl9sbikpDQoNCmBgYA0KDQoNCmBgYHtyIGV4dHJhY3RpbmctdmVya2xhcmluZy1rZW5tZXJrZW4sIGV2YWw9RkFMU0V9DQoNCiMgRGV0YWNoaW5nIHRoZSBuYW1lcyBhbmQgb3JpZ2luIGluZm8gZm9yIGVhc2llciBkYXRhIGhhbmRsaW5nDQpvcmlnaW4gPC0gdW5saXN0KG9yaWdpbl9sbikNCg0KDQoNCm9yaWdpbiA8LSBzdHJfZXh0cmFjdF9hbGwob3JpZ2luLCAidmFyaWFudGVuKC4qPynCqSIpDQoNCiMgT3JpZ2luIGluZm9ybWF0aW9uIGlzIHVzdWFsbHkgbWVudGlvbmVkIGFmdGVyICJ2ZXJrbGFyaW5nIiBvciAia2VubWVya2VuIg0Kb3JpZ2luIDwtIHN0cl9yZW1vdmVfYWxsKG9yaWdpbiwgInZhcmlhbnRlbiIpDQpvcmlnaW4gPC0gc3RyX3JlbW92ZV9hbGwob3JpZ2luLCAiQ0JHIEJyb25uZW4iKQ0Kb3JpZ2luIDwtIHN0cl9yZW1vdmVfYWxsKG9yaWdpbiwgImNhdGFsb2d1cyIpDQpvcmlnaW4gPC0gc3RyX3JlbW92ZV9hbGwob3JpZ2luLCAiwqkiKQ0KDQoNCnZlcmtsYXJpbmcgPC0gc3RyX3JlbW92ZV9hbGwob3JpZ2luLCAia2VubWVya2VuOiguKj8pJCIpDQprZW5tZXJrZW4gPC0gc3RyX2V4dHJhY3RfYWxsKG9yaWdpbiwgImtlbm1lcmtlbjooLio/KSQiKQ0Ka2VubWVya2VuIDwtIHN0cl9yZW1vdmVfYWxsKGtlbm1lcmtlbiwgInNwZWNpZmlla2UgY29tcG9uZW50ZW46KC4qPykkIikNCnNjIDwtIHN0cl9leHRyYWN0X2FsbChvcmlnaW4sICJzcGVjaWZpZWtlIGNvbXBvbmVudGVuOiguKj8pJCIpICMgTm90IGRpcmVjdGx5IHJlbGV2YW50IHRvIHVzLCBidXQgZG9lcyBtZWFuIHRoYXQgdGhlIG5hbWUgaGFzIGEgd2VicGFnZQ0KDQoNCiMgTWFrZSBpbnRvIGEgbmVhdCBkYXRhZnJhbWUgd2l0aCB0aGUgbmFtZXMgYXR0YWNoZWQNCnZlcmtsYXJpbmcgPC0gdHJpbXdzKHZlcmtsYXJpbmcsIHdoaWNoID0gImJvdGgiKQ0Ka2VubWVya2VuIDwtIHRyaW13cyhrZW5tZXJrZW4sIHdoaWNoID0gImJvdGgiKQ0Kc2MgPC0gdHJpbXdzKHNjLCB3aGljaCA9ICJib3RoIikNCnZrIDwtIGRhdGEuZnJhbWUobGFzdG5hbWVfZGYkaWQsbGFzdG5hbWVfZGYkbGFzdG5hbWUsIHZlcmtsYXJpbmcsIGtlbm1lcmtlbiwgc2MpDQoNCg0KYGBgDQoNCg0KDQojIFNlcGFyYXRpbmcgbmFtZXMgd2l0aCBEdXRjaCAmIHVua25vd24gb3JpZ2luDQpOZXh0LCB3ZSBpZGVudGlmeSB0aG9zZSBuYW1lcyBmb3Igd2hpY2ggbm8gYWRkaXRpb25hbCBpbmZvcm1hdGlvbiB3YXMgZm91bmQuIFRoaXMgaXMgaW1wb3J0YW50IHRvIGRpc3Rpbmd1aXNoIER1dGNoIG5hbWVzIGZyb20gbmFtZXMgd2l0aCB1bmtub3duIG9yaWdpbnMuDQoNCi0gRHV0Y2ggbmFtZXM6IG5vIGxhYmVsIGluZGljYXRpbmcgdGhhdCB0aGUgbmFtZSBpcyBEdXRjaCwgYnV0IHNvbWUgb3RoZXIgaW5mb3JtYXRpb24gYXZhaWxhYmxlIG9uIG5hbWUgb3JpZ2luDQotIFVua25vd24gbmFtZXM6IHdlYiBwYWdlIGNhbm5vdCBiZSBmb3VuZCwgc28gb3JpZ2luIGluZm9ybWF0aW9uIGlzIGVtcHR5LiANCg0KYGBge3Igb3JpZ2luLXVua25vd24sIGV2YWw9RkFMU0V9DQoNCiMgSWRlbnRpZnkgbGFzdCBuYW1lcyB0aGF0IGNvdWxkIG5vdCBiZSBmb3VuZA0KdmsgPC0gdmsgJT4lDQogIG11dGF0ZSh2ZXJrbGFyaW5nID0gaWZlbHNlKHZlcmtsYXJpbmc9PSIiLCAwLCB2ZXJrbGFyaW5nKSwgDQogICAgICAgICBrZW5tZXJrZW4gPSBpZmVsc2Uoa2VubWVya2VuPT0iY2hhcmFjdGVyKDApIiwgMCwga2VubWVya2VuKSwNCiAgICAgICAgIHNjID0gaWZlbHNlKHNjPT0iY2hhcmFjdGVyKDApIiwgMCwgc2MpLA0KICAgICAgICAgbm9faW5mbyA9IG5jaGFyKHZlcmtsYXJpbmcpICsgbmNoYXIoa2VubWVya2VuKSArIG5jaGFyKHNjKSkNCg0KdmsgPC0gdmsgJT4lDQogIG11dGF0ZShub19pbmZvID0gaWZlbHNlKG5vX2luZm89PTMsIDEsIDApLCANCiAgICAgICAgIHZlcmtsYXJpbmcgPSBpZmVsc2UodmVya2xhcmluZz09MCwgTkEsIHZlcmtsYXJpbmcpLA0KICAgICAgICAga2VubWVya2VuID0gaWZlbHNlKGtlbm1lcmtlbj09MCwgTkEsIGtlbm1lcmtlbikpDQojIElmIHRoZXJlIGlzIG5vIHRleHQgaW4gdmVya2xhcmluZyBvciBrZW5tZXJrZW4sIHRoZSBuYW1lIGNvdWxkIG5vdCBiZSBmb3VuZCBpbiB0aGUgZGF0YWJhc2VzLiANCmBgYA0KDQoNCiMgRXh0cmFjdGluZyBzcGVjaWZpYyBvcmlnaW4gaW5mb3JtYXRpb24NClRoZXJlIGFyZSB0aHJlZSBtYWluIHdheXMgdG8gZ2V0IGluZm9ybWF0aW9uIGFib3V0IHRoZSBvcmlnaW4gb2YgbGFzdCBuYW1lczoNCg0KMSkgVW5kZXIgImtlbm1lcmtlbiIsIGxhc3QgbmFtZXMgYXJlIGFzc2lnbmVkIGNsaWNrYWJsZSB0YWdzLiBUaGVzZSB0YWdzIGluY2x1ZGUgdW5zcGVjaWZpZWQgZm9yZWlnbiBuYW1lIHRhZ3MgKCJhbmRlcmUgdGFhbCIpLCBhcyB3ZWxsIGFzIHNwZWNpZmljIGZvcmVpZ24gb3JpZ2lucyBvZiB0aGUgbmFtZSAoIkZyYW5zZSBuYWFtIiwgIkluZGlzY2hlIG5hYW0iKS4NCi0+IG9yaWdpbjEgKyBvcmlnaW40DQoNCjIpIFNldmVyYWwgbmFtZXMgaGF2ZSBtb3JlIGV4dGVuc2l2ZWx5IHdyaXR0ZW4gb3V0IHN0b3JpZXMgYmVoaW5kIHRoZSBuYW1lLCB1bmRlciAidmVya2xhcmluZyIuIEEgbnVtYmVyIG9mIG5hbWVzIGNvbnRhaW4gZGV0YWlsZWQgKGVpdGhlciBjb3VudHJ5LWxldmVsIG9yIHJlZ2lvbmFsKSBvcmlnaW5zLCB1c3VhbGx5IGluIHRoZSBmb3JtIG9mICJEZSBuYWFtIFt4eXpdIGlzIGFma29tc3RpZyB1aXQgW2NvdW50cnldIi4gDQotPiBvcmlnaW4yIA0KDQozKSBTb21lIG5hbWVzIGhhdmUgb3JpZ2luIGluZm9ybWF0aW9uIHVuZGVyICJ2ZXJrbGFyaW5nIiBpbiB0aGUgZm9ybSBvZiB0aGUgbGluZ3Vpc3RpYyBvcmlnaW5zIG9mIHRoZSBuYW1lLiBUaGlzIGNhbiBiZSBjb3VudHJ5IHNwZWNpZmljIChlLmcuIENoaW5lc2UgbmFtZSksIGJ1dCBpdCBjYW4gYWxzbyBhcHBseSB0byBtdWx0aXBsZSBjb3VudHJpZXMgd2hlbiB0aGUgbGFuZ3VhZ2UgaXMgc3Bva2VuIGluIG1vcmUgdGhhbiAxIGNvdW50cmllcyAoZS5nLiBTcGFuaXNoIG5hbWUpLiANCi0+IG9yaWdpbjMgDQoNCkluIGhldCBzY3JpcHQgaGllcm9uZGVyIGdhYXQgZXJnZW5zIGlldHMgZm91dC4gRGFhcm9tIGhlYiBpayBldG5pY2l0ZWl0IHVpdGVpbmRlbGlqayBtZXQgZGUgaGFuZCB1aXRnZXJla2VuZC4NCg0KYGBge3IgY291bnRyaWVzLWV4dHJhY3QsIGV2YWw9RkFMU0V9DQoNCiMgU3RlcCAxOiBleHRyYWN0aW5nIG9yaWdpbiB0YWdzIGZyb20ga2VubWVya2VuDQp2ayA8LSB2ayAlPiUNCiAgbXV0YXRlKG9yaWdpbjEgPSBzdHJfZXh0cmFjdChrZW5tZXJrZW4sICJbOnVwcGVyOl0oWzpsb3dlcjpdezIsfSkgbmFhbSIpKQ0KDQojIE5vdGU6IHNvbWV0aW1lcyBtdWx0aXBsZSBvcmlnaW5zIGFyZSBtZW50aW9uZWQuIEN1cnJlbnRseSwgSSBvbmx5IGV4dHJhY3QgdGhlIGZpcnN0IG9uZS4gT3RoZXJ3aXNlLCB3ZSBzaG91bGQgdXNlIHN0cl9leHRyYWN0X2FsbC4gDQoNCg0KDQojIFN0ZXAgMjogZXh0cmFjdGluZyBvcmlnaW4gaW5mbyBmcm9tIHZlcmtsYXJpbmcgDQp2ayA8LSB2ayAlPiUNCiAgbXV0YXRlKG9yaWdpbjIgPSBpZmVsc2UoYXMubnVtZXJpYyhzdHJfZGV0ZWN0KHZlcmtsYXJpbmcsICJhZmtvbXN0aWcgdWl0IikpID09IDEsIA0KICAgICAgICAgc3RyX3JlbW92ZSh2ZXJrbGFyaW5nLCAiLiphZmtvbXN0aWcgdWl0IiksIE5BKSkNCg0KDQojIFN0ZXAgMzogZXh0cmFjdGluZyBhZGRpdGlvbmFsIG9yaWdpbiBpbmZvIGZyb20gdmVya2xhcmluZw0KdmsgPC0gdmsgJT4lDQogIG11dGF0ZShvcmlnaW4zID0gc3RyX2V4dHJhY3QodmVya2xhcmluZywgIls6dXBwZXI6XShbOmxvd2VyOl17Mix9KSAoYWNodGVyKT8oZmFtaWxpZSk/KGJlcm9lcHMpP25hYW0iKSkNCg0KDQoNCiMgRmluYWxseSwgd2UgY2xlYW4gdXAgdGhlIG9yaWdpbiBpbmZvcm1hdGlvbiBleHRyYWN0ZWQgYWJvdmUNCg0KIyBPcmlnaW4xOiBhbHJlYWR5IG5lYXQNCnZrJG9yaWdpbjEgPC0gc3RyX3JlbW92ZSh2ayRvcmlnaW4xLCAiSm9vZHNlIG5hYW0iKSAjIGNhbiBiZSBEdXRjaCAmIG5vbi1EdXRjaA0KDQojIE9yaWdpbjI6IG1lc3N5DQp2ayRvcmlnaW4yIDwtIHN0cl9yZW1vdmUodmskb3JpZ2luMiwgIlxcLi4qIikgIyByZW1vdmUgZXh0cmEgaW5mbyBpbiB0aGUgZm9sbG93aW5nIHNlbnRlbmNlIA0Kdmskb3JpZ2luMiA8LSBzdHJfcmVtb3ZlKHZrJG9yaWdpbjIsICJcXDsuKiIpICMgcmVtb3ZlIGV4dHJhIGluZm8gaW4gdGhlIGZvbGxvd2luZyBzZW50ZW5jZSANCnZrJG9yaWdpbjIgPC0gc3RyX3JlbW92ZSh2ayRvcmlnaW4yLCAiXFwoLioiKSAjIHJlbW92ZSBleHRyYSBpbmZvIGluIHRoZSBmb2xsb3dpbmcgc2VudGVuY2UgDQoNCg0KdmskZHBnIDwtIGFzLm51bWVyaWMoc3RyX2RldGVjdCh2ayRvcmlnaW4yLCAiKGRvcnApfChwbGFhdHMpfChnZW1lZW50ZSl8KGdyYWFmc2NoYXApfChzdGFkKXwoZGVlbCl8KEZyaWVzbGFuZCkiKSkgIyBvcmlnaW4gaW5mbyB0b28gcmVnaW9uYWwgDQp2ayA8LSB2ayAlPiUgbXV0YXRlKG9yaWdpbjIgPSBpZmVsc2UoKGRwZz09MSksIE5BLCBvcmlnaW4yKSkgIyByZW1vdmluZyByZWdpb25hbCBvcmlnaW4gaW5mbw0KdmsgPC0gc3Vic2V0KHZrLCBzZWxlY3QgPSAtZHBnKSAjIHJlbW92aW5nIGludGVybWVkaWF0ZSB2YXJpYWJsZQ0KDQojIFNvbWV0aW1lcywgdGhlcmUgd2VyZSBtdWx0aXBsZSBjb3VudHJpZXMgbWVudGlvbmVkLiBUYWtlIG9ubHkgdGhlIGZpcnN0Og0Kdmskb3JpZ2luMiA8LSBzdHJfcmVtb3ZlKHZrJG9yaWdpbjIsICJcXCwuKiIpICMgT25seSBmaXJzdA0Kdmskb3JpZ2luMiA8LSBzdHJfcmVtb3ZlKHZrJG9yaWdpbjIsICJcXHMoZW4pLioiKSAjIE9ubHkgZmlyc3QgDQp2ayRvcmlnaW4yIDwtIHN0cl9yZW1vdmUodmskb3JpZ2luMiwgIlxccyhvZikuKiIpICMgT25seSBmaXJzdCANCg0KDQojIE9yaWdpbjM6IHByZXR0eSBuZWF0DQp2ayRvcmlnaW4zIDwtIHN0cl9yZW1vdmUodmskb3JpZ2luMywgIkQoaSk/ZSh6ZSk/IChmYW1pbGllKT8oYWNodGVyKT8oYmVyb2Vwcyk/bmFhbSIpICMgc2xpcHBlZCB0aHJvdWdoIHRoZSByZWdleA0Kdmskb3JpZ2luMyA8LSBzdHJfcmVtb3ZlKHZrJG9yaWdpbjMsICJFZW4gKGZhbWlsaWUpPyhhY2h0ZXIpPyhiZXJvZXBzKT9uYWFtIikgIyBzbGlwcGVkIHRocm91Z2ggdGhlIHJlZ2V4DQp2ayRvcmlnaW4zIDwtIHN0cl9yZW1vdmUodmskb3JpZ2luMywgIlppam4gKGZhbWlsaWUpPyhhY2h0ZXIpPyhiZXJvZXBzKT9uYWFtIikgIyBzbGlwcGVkIHRocm91Z2ggdGhlIHJlZ2V4DQp2ayRvcmlnaW4zIDwtIHN0cl9yZW1vdmUodmskb3JpZ2luMywgIkFscyAoZmFtaWxpZSk/KGFjaHRlcik/KGJlcm9lcHMpP25hYW0iKSAjIHNsaXBwZWQgdGhyb3VnaCB0aGUgcmVnZXgNCnZrJG9yaWdpbjMgPC0gc3RyX3JlbW92ZSh2ayRvcmlnaW4zLCAiSm9vZHNlIChmYW1pbGllKT8oYWNodGVyKT9uYWFtIikNCnZrJG9yaWdpbjMgPC0gc3RyX3JlbW92ZSh2ayRvcmlnaW4zLCAiQmlqYmVsc2UgKGZhbWlsaWUpPyhhY2h0ZXIpP25hYW0iKQ0KDQoNCiMgU2V0dGluZyBlbXB0eSBvcmlnaW4gdmFyaWFibGVzIHRvIE5BIChEdXRjaCBvciB1bmZvdW5kIGZvcmVpZ24pDQp2ayA8LSB2ayAlPiUNCiAgbXV0YXRlKG9yaWdpbjEgPSBhcy5jaGFyYWN0ZXIoaWZlbHNlKG9yaWdpbjE9PSIifG9yaWdpbjE9PSJjaGFyYWN0ZXIoMCkiLCBOQSwgb3JpZ2luMSkpLA0KICAgICAgICAgb3JpZ2luMiA9IGFzLmNoYXJhY3RlcihpZmVsc2Uob3JpZ2luMj09IiJ8b3JpZ2luMj09ImNoYXJhY3RlcigwKSIsIE5BLCBvcmlnaW4yKSksDQogICAgICAgICBvcmlnaW4zID0gYXMuY2hhcmFjdGVyKGlmZWxzZShvcmlnaW4zPT0iInxvcmlnaW4zPT0iY2hhcmFjdGVyKDApIiwgTkEsIG9yaWdpbjMpKSkNCg0KDQoNCiMgRmluYWxseSwgdGhlIHRhZyAiYW5kZXJlIHRhYWwiIHdhcyB1c2VkIHRvIGRpc3Rpbmd1aXNoIGZvcmVpZ24gbmFtZXMgb2YgdW5rbm93biBvcmlnaW4gZnJvbSBrbm93biBEdXRjaCBuYW1lcy4gDQp2ayA8LSB2ayAlPiUNCiAgbXV0YXRlKG9yaWdpbjQgPSBpZmVsc2UoKGFzLm51bWVyaWMoc3RyX2RldGVjdChrZW5tZXJrZW4sICJhbmRlcmUgdGFhbCIpKT09MSksICJub24tRHV0Y2giLCBOQSkpDQoNCnZvb3J2b2Vnc2Vsc25sIDwtIGMoIid0IiwgImQnIiwgImRlIiwgImRlIGxhIiwgImRlbiIsICJkZWwiLCAiZGVyIiwgImRlcyIsICJpbiAndCIsICJvcCBkZSIsICJvcCBkZW4iLCAidGVuIiwgInRlciIsICJ0ZXMiLCAidmFuIiwgInZhbiAndCIsICJ2YW4gZGUiICwgInZhbiBkZXIiLCAidmFuIGRlbiIpIA0KDQp2ayRubCA8LSAwDQoNCmZvciAoaSBpbiAxOiBsZW5ndGgodmskbGFzdG5hbWVfZGYubGFzdG5hbWUpKSB7DQogIGlmIChzdW0oc3RyX2RldGVjdChsYXN0bmFtZV9kZiRucFtpXSwgdm9vcnZvZWdzZWxzbmwpKT4wKSB7DQogICAgdmskbmxbaV0gPC0gMQ0KICB9DQp9DQoNCnZrJGR1dGNoIDwtIGlmZWxzZSgoKHZrJG5vX2luZm89PTEgfCB2ayRub19pbmZvPT0ibm9uX0R1dGNoIikgJiAhdmskbmw9PTEpLCAwLCAxKQ0KbmFtZXModmspDQp2ayAlPiUgc2VsZWN0KGMoImxhc3RuYW1lX2RmLmlkIiwgImxhc3RuYW1lX2RmLmxhc3RuYW1lIiwgImR1dGNoIikpDQoNCmBgYA0KDQpgYGB7ciwgZXZhbD1GQUxTRX0NCnNhdmUodmssIGZpbGU9InZrMjAyMjEwMDYuUkRhdGEiKSANCmBgYA0KDQpgYGB7ciwgZXZhbD1GQUxTRX0NCm5hbWVzX2RmJGR1dGNoIDwtIHZrJGR1dGNoDQpgYGANCg0KDQpgYGB7ciwgZXZhbD1GQUxTRX0NCnNhdmUobmFtZXNfZGYsIGZpbGU9Im5hbWVzX2RmX3YyMDIyMTAwNi5SRGF0YSIpIA0KYGBgDQoNCg0KSWsgem91IGFsbGVzIHdhYXIgYG5vX2luZm9gIG9wIDEgc3RhYXQgb2Ygd2FhciBgb3JpZ2luNGAgb3AgIm5vbi1EdXRjaCIgc3RhYXQgY29kZXJlbiBhbHMgYnVpdGVubGFuZHMhIA0KDQoNCg==