diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..d1dec1f --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,14 @@ +Package: rIP +Type: Package +Title: Passes an array of IP addresses to iphub.info and returns a dataframe with details of IP +Version: 0.1.0 +Author: Ryan Kennedy +Maintainer: Ryan Kennedy +Description: Takes as its input an array of IPs and the user's X-Key, passes these to iphub.info, and returns a dataframe with the ip (used for merging), country code, country name, asn, isp, block, and hostname. + Especially important in this is the variable "block", which gives a score indicating whether the IP address is likely from a server farm and should be excluded from the data. It is codes 0 if the IP is residential/unclassified (i.e. safe IP), 1 if the IP is non-residential IP (hostping provider, proxy, etc. -- should likely be excluded), and 2 for non-residential and residential IPs (more stringent, may flag innocent respondents). + The recommendation from iphub.info is to block or exclude those who score block = 1. +Imports: + httr +License: CC0 +Encoding: UTF-8 +LazyData: true diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..40ea8e2 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1 @@ +exportPattern("^[[:alpha:]]+") diff --git a/R/rIP function.R b/R/rIP function.R new file mode 100644 index 0000000..a58df7b --- /dev/null +++ b/R/rIP function.R @@ -0,0 +1,20 @@ +#' @export + +# rIP function + +getIPinfo <- function(ips, key) { + options(stringsAsFactors = FALSE) + url <- "http://v2.api.iphub.info/ip/" + pb <- txtProgressBar(min = 0, max = length(ips), style = 3) + ipDF <- c() + for (i in 1:length(ips)) { + ipInfo <- httr::GET(paste0(url, ips[i]), add_headers(`X-Key` = key)) + infoVector <- unlist(httr::content(ipInfo)) + ipDF <- rbind(ipDF, infoVector) + setTxtProgressBar(pb, i) + } + close(pb) + ipDF <- data.frame(ipDF) + rownames(ipDF) <- NULL + return(ipDF) +} diff --git a/README.md b/README.md index 8ecff27..0a01040 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,9 @@ -# rIP -This is an R code project for detecting likely responsese from server farms on MTurk surveys. It will eventually be built into a package, but for now is an easy function for use. Credit to @tylerburleigh for pointing out the utility of iphub.info. His method for incorporating this information into Qualtrics surveys can be found here: https://twitter.com/tylerburleigh/status/1042528912511848448?s=19. +This is an R code project for detecting likely responsese from server farms on MTurk surveys. + +Takes as its input an array of IPs and the user's X-Key, passes these to iphub.info, and returns a dataframe with the ip (used for merging), country code, country name, asn, isp, block, and hostname. + +Especially important in this is the variable "block", which gives a score indicating whether the IP address is likely from a server farm and should be excluded from the data. It is codes 0 if the IP is residential/unclassified (i.e. safe IP), 1 if the IP is non-residential IP (hostping provider, proxy, etc. -- should likely be excluded), and 2 for non-residential and residential IPs (more stringent, may flag innocent respondents). + +The recommendation from iphub.info is to block or exclude those who score block = 1. + +Credit to @tylerburleigh for pointing out the utility of iphub.info. His method for incorporating this information into Qualtrics surveys can be found here: https://twitter.com/tylerburleigh/status/1042528912511848448?s=19. \ No newline at end of file