From b72eb33de2a5218828789f0bd6c7e2deef59bc2c Mon Sep 17 00:00:00 2001 From: Hector Correa Date: Wed, 30 Nov 2022 18:09:58 -0500 Subject: [PATCH 1/2] Added support for matchFields --- README.md | 24 +++++++++++--------- cmd/marcli/json.go | 2 +- cmd/marcli/main.go | 40 +++++++++++++++++++++++---------- cmd/marcli/mrc.go | 2 +- cmd/marcli/mrk.go | 2 +- cmd/marcli/processFileParams.go | 17 +++++++------- cmd/marcli/solr.go | 2 +- cmd/marcli/xml.go | 2 +- pkg/marc/record.go | 27 ++++++++++++++++++++-- 9 files changed, 80 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index 5719cd0..ed54151 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ On the Mac or Linux the easiest way to install `marcli` is via Homebrew: ``` brew install marcli +marcli -file yourfile.mrc ``` Or by downloading the binary for your OS from the [releases tab](https://github.com/hectorcorrea/marcli/releases) and marking the downloaded file as an executable: @@ -16,40 +17,41 @@ Or by downloading the binary for your OS from the [releases tab](https://github. ``` curl -LO https://github.com/hectorcorrea/marcli/releases/latest/download/marcli chmod u+x marcli -``` - -Once installed you can just run it via: - -``` ./marcli -file yourfile.mrc ``` +Notice that if you install `marcli` via Homebrew the executable will be on your path and you can run it just by typing `marcli` from any folder, whereas if you install it via cURL you need to indicate the path `./marcli`. ## Sample of usage -Output MARC data to the console in a line delimited format: +Output MARC data to the console in a line delimited format (`marcli` automatically detects whether the file provided is in MARC XML or MARC binary): + ``` ./marcli -file data/test_1a.mrc +./marcli -file data/test_10.xml ``` -If the file extension is `.xml` the file is expected to be a MARC XML file, otherwise MARC binary is assumed. - Extract MARC records on file that contain the string "wildlife" ``` ./marcli -file data/test_10.mrc -match wildlife ``` -Extracts MARC records on file that contain the string "wildlife" but outputs only fields "LDR,001,040,245a,650" for each record. +Extracts MARC records on file that contain the string "wildlife" but outputs only fields "LDR,001,040,245a,650" for each record (LDR means the leader of the MARC record). ``` ./marcli -file data/test_10.mrc -match wildlife -fields LDR,010,040,245a,650 ``` -LDR means the leader of the MARC record. +The `-matchFields` parameter to limit the fields where the match will be made: + +``` +./marcli -file=data/test_10.mrc -match=web -matchFields=530 +```` A letter (or letters) after the field tag indicates to output only those subfields. For example "907xz" means output subfield "x" and "z" in field "907". -You can also use the `exclude` option to indicate fields to exclude from the output (notice that only full subfields are supported here, e.g. 970 is accepted but not 970a) + +You can also use the `exclude` option to indicate fields to exclude from the output (notice that only full fields are supported here, e.g. 970 is accepted but not 970a) You can also filter based on the presence of certain fields in the MARC record (regardless of their value), for example the following will only output records that have a MARC 110 field: diff --git a/cmd/marcli/json.go b/cmd/marcli/json.go index 9e002c0..ec02a60 100644 --- a/cmd/marcli/json.go +++ b/cmd/marcli/json.go @@ -42,7 +42,7 @@ func toJson(params ProcessFileParams) error { if i++; i < start { continue } - if r.Contains(params.searchValue) && r.HasFields(params.hasFields) { + if r.Contains(params.searchValue, params.searchFields) && r.HasFields(params.hasFields) { if out > 0 { fmt.Printf(",\r\n") } else { diff --git a/cmd/marcli/main.go b/cmd/marcli/main.go index a94275a..04bcc0e 100644 --- a/cmd/marcli/main.go +++ b/cmd/marcli/main.go @@ -9,13 +9,14 @@ import ( "github.com/hectorcorrea/marcli/pkg/marc" ) -var fileName, search, fields, exclude, format, hasFields string +var fileName, search, searchFields, fields, exclude, format, hasFields string var start, count int var debug bool func init() { flag.StringVar(&fileName, "file", "", "MARC file to process. Required.") flag.StringVar(&search, "match", "", "String that must be present in the content of the record, case insensitive.") + flag.StringVar(&searchFields, "matchFields", "", "Comma delimited list of fields to search, used when match parameter is indicated, defaults to all fields.") flag.StringVar(&fields, "fields", "", "Comma delimited list of fields to output.") flag.StringVar(&exclude, "exclude", "", "Comma delimited list of fields to exclude from the output.") flag.StringVar(&format, "format", "mrk", "Output format. Accepted values: mrk, mrc, xml, json, or solr.") @@ -33,14 +34,15 @@ func main() { } params := ProcessFileParams{ - filename: fileName, - searchValue: strings.ToLower(search), - filters: marc.NewFieldFilters(fields), - exclude: marc.NewFieldFilters(exclude), - start: start, - count: count, - hasFields: marc.NewFieldFilters(hasFields), - debug: debug, + filename: fileName, + searchValue: strings.ToLower(search), + searchFields: searchFieldsFromString(searchFields), + filters: marc.NewFieldFilters(fields), + exclude: marc.NewFieldFilters(exclude), + start: start, + count: count, + hasFields: marc.NewFieldFilters(hasFields), + debug: debug, } if len(params.filters.Fields) > 0 && len(params.exclude.Fields) > 0 { @@ -73,12 +75,26 @@ func showSyntax() { fmt.Printf("\r\n") fmt.Printf(` NOTES: - The match parameter is used to filter records based on the content of the -values in the record. The hasFields parameter is used to filter records based -on the presence of certain fields on the record (regardless of their value). + The match parameter is used to filter records based on their content. +By default marcli searches in all the fields for each record, you can use +the matchFields parameter to limit the search to only certain fields (subfields +are not supported in matchFields, i.e. 245 is OK, 245a is not) + + The hasFields parameter is used to filter records based on the presence +of certain fields on the record (regardless of their value). You can only use the fields or exclude parameter, but not both. `) fmt.Printf("\r\n") fmt.Printf("\r\n") } + +func searchFieldsFromString(searchFieldsString string) []string { + values := []string{} + for _, value := range strings.Split(searchFieldsString, ",") { + if strings.TrimSpace(searchFieldsString) != "" { + values = append(values, value) + } + } + return values +} diff --git a/cmd/marcli/mrc.go b/cmd/marcli/mrc.go index 8979b2a..be39611 100644 --- a/cmd/marcli/mrc.go +++ b/cmd/marcli/mrc.go @@ -39,7 +39,7 @@ func toMrc(params ProcessFileParams) error { continue } - if r.Contains(params.searchValue) && r.HasFields(params.hasFields) { + if r.Contains(params.searchValue, params.searchFields) && r.HasFields(params.hasFields) { fmt.Printf("%s", r.Raw()) if out++; out == count { break diff --git a/cmd/marcli/mrk.go b/cmd/marcli/mrk.go index 196d384..71aaaeb 100644 --- a/cmd/marcli/mrk.go +++ b/cmd/marcli/mrk.go @@ -44,7 +44,7 @@ func toMrk(params ProcessFileParams) error { continue } - if r.Contains(params.searchValue) && r.HasFields(params.hasFields) { + if r.Contains(params.searchValue, params.searchFields) && r.HasFields(params.hasFields) { str := "" if params.filters.IncludeLeader() { str += fmt.Sprintf("%s\r\n", r.Leader) diff --git a/cmd/marcli/processFileParams.go b/cmd/marcli/processFileParams.go index addabf9..47ef3f8 100644 --- a/cmd/marcli/processFileParams.go +++ b/cmd/marcli/processFileParams.go @@ -5,14 +5,15 @@ import ( ) type ProcessFileParams struct { - filename string - searchValue string - filters marc.FieldFilters - exclude marc.FieldFilters - start int - count int - hasFields marc.FieldFilters - debug bool + filename string + searchValue string + searchFields []string + filters marc.FieldFilters + exclude marc.FieldFilters + start int + count int + hasFields marc.FieldFilters + debug bool } func (p ProcessFileParams) HasFilters() bool { diff --git a/cmd/marcli/solr.go b/cmd/marcli/solr.go index a355d89..bb6a4b4 100644 --- a/cmd/marcli/solr.go +++ b/cmd/marcli/solr.go @@ -93,7 +93,7 @@ func toSolr(params ProcessFileParams) error { if i++; i < start { continue } - if r.Contains(params.searchValue) && r.HasFields(params.hasFields) { + if r.Contains(params.searchValue, params.searchFields) && r.HasFields(params.hasFields) { if out > 0 { fmt.Printf(",\r\n") } else { diff --git a/cmd/marcli/xml.go b/cmd/marcli/xml.go index 8fb7e75..b746c58 100644 --- a/cmd/marcli/xml.go +++ b/cmd/marcli/xml.go @@ -70,7 +70,7 @@ func toXML(params ProcessFileParams) error { continue } - if r.Contains(params.searchValue) && r.HasFields(params.hasFields) { + if r.Contains(params.searchValue, params.searchFields) && r.HasFields(params.hasFields) { str, err := recordToXML(r, params) if err != nil { if params.debug { diff --git a/pkg/marc/record.go b/pkg/marc/record.go index 4c92251..08e8c7d 100644 --- a/pkg/marc/record.go +++ b/pkg/marc/record.go @@ -14,11 +14,25 @@ type Record struct { } // Contains returns true if Record contains the value passed. -func (r Record) Contains(searchValue string) bool { +// If searchFieldList is an empty array it searches in all fields for the record +// otherwise the search is limited to only the fields in the array. +func (r Record) Contains(searchValue string, searchFieldsList []string) bool { if searchValue == "" { return true } - for _, field := range r.Fields { + + var searchFields []Field + if len(searchFieldsList) == 0 { + searchFields = r.Fields + } else { + for _, field := range r.Fields { + if r.arrayContains(searchFieldsList, field.Tag) { + searchFields = append(searchFields, field) + } + } + } + + for _, field := range searchFields { if field.Contains(searchValue) { return true } @@ -175,3 +189,12 @@ func (r Record) GetValues(tag string, subfield string) []string { } return values } + +func (r Record) arrayContains(array []string, value string) bool { + for _, element := range array { + if element == value { + return true + } + } + return false +} From 63dfed1688d71fe4a2271153ab8cb02d33df01ec Mon Sep 17 00:00:00 2001 From: Hector Correa Date: Wed, 30 Nov 2022 18:21:33 -0500 Subject: [PATCH 2/2] Minor tweaks --- README.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index ed54151..6851619 100644 --- a/README.md +++ b/README.md @@ -36,21 +36,18 @@ Extract MARC records on file that contain the string "wildlife" ./marcli -file data/test_10.mrc -match wildlife ``` -Extracts MARC records on file that contain the string "wildlife" but outputs only fields "LDR,001,040,245a,650" for each record (LDR means the leader of the MARC record). +Extracts MARC records on file that contain the string "wildlife" but outputs only fields "LDR,001,040,245a,650" for each record, LDR means the leader of the MARC record. In the `-fields` parameter a letter (or letters) after the field tag indicates to output only those subfields. For example "907xz" means output subfield "x" and "z" in field "907". ``` ./marcli -file data/test_10.mrc -match wildlife -fields LDR,010,040,245a,650 ``` -The `-matchFields` parameter to limit the fields where the match will be made: +The `-matchFields` parameter can be used to limit the fields where the match will be made: ``` ./marcli -file=data/test_10.mrc -match=web -matchFields=530 ```` -A letter (or letters) after the field tag indicates to output only those subfields. For example "907xz" means output subfield "x" and "z" in field "907". - - You can also use the `exclude` option to indicate fields to exclude from the output (notice that only full fields are supported here, e.g. 970 is accepted but not 970a) You can also filter based on the presence of certain fields in the MARC record (regardless of their value), for example the following will only output records that have a MARC 110 field: