Skip to content

Commit

Permalink
Merge pull request #11 from hectorcorrea/matchFields
Browse files Browse the repository at this point in the history
Match fields
  • Loading branch information
hectorcorrea authored Nov 30, 2022
2 parents 59f1aa9 + 63dfed1 commit ae5f33b
Show file tree
Hide file tree
Showing 9 changed files with 78 additions and 39 deletions.
23 changes: 11 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,47 +9,46 @@ On the Mac or Linux the easiest way to install `marcli` is via Homebrew:

```
brew install marcli
marcli -file yourfile.mrc
```

Or by downloading the binary for your OS from the [releases tab](https://github.com/hectorcorrea/marcli/releases) and marking the downloaded file as an executable:

```
curl -LO https://github.com/hectorcorrea/marcli/releases/latest/download/marcli
chmod u+x marcli
```

Once installed you can just run it via:

```
./marcli -file yourfile.mrc
```

Notice that if you install `marcli` via Homebrew the executable will be on your path and you can run it just by typing `marcli` from any folder, whereas if you install it via cURL you need to indicate the path `./marcli`.

## Sample of usage

Output MARC data to the console in a line delimited format:
Output MARC data to the console in a line delimited format (`marcli` automatically detects whether the file provided is in MARC XML or MARC binary):

```
./marcli -file data/test_1a.mrc
./marcli -file data/test_10.xml
```

If the file extension is `.xml` the file is expected to be a MARC XML file, otherwise MARC binary is assumed.

Extract MARC records on file that contain the string "wildlife"
```
./marcli -file data/test_10.mrc -match wildlife
```

Extracts MARC records on file that contain the string "wildlife" but outputs only fields "LDR,001,040,245a,650" for each record.
Extracts MARC records on file that contain the string "wildlife" but outputs only fields "LDR,001,040,245a,650" for each record, LDR means the leader of the MARC record. In the `-fields` parameter a letter (or letters) after the field tag indicates to output only those subfields. For example "907xz" means output subfield "x" and "z" in field "907".

```
./marcli -file data/test_10.mrc -match wildlife -fields LDR,010,040,245a,650
```

LDR means the leader of the MARC record.
The `-matchFields` parameter can be used to limit the fields where the match will be made:

A letter (or letters) after the field tag indicates to output only those subfields. For example "907xz" means output subfield "x" and "z" in field "907".
```
./marcli -file=data/test_10.mrc -match=web -matchFields=530
````
You can also use the `exclude` option to indicate fields to exclude from the output (notice that only full subfields are supported here, e.g. 970 is accepted but not 970a)
You can also use the `exclude` option to indicate fields to exclude from the output (notice that only full fields are supported here, e.g. 970 is accepted but not 970a)
You can also filter based on the presence of certain fields in the MARC record (regardless of their value), for example the following will only output records that have a MARC 110 field:
Expand Down
2 changes: 1 addition & 1 deletion cmd/marcli/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ func toJson(params ProcessFileParams) error {
if i++; i < start {
continue
}
if r.Contains(params.searchValue) && r.HasFields(params.hasFields) {
if r.Contains(params.searchValue, params.searchFields) && r.HasFields(params.hasFields) {
if out > 0 {
fmt.Printf(",\r\n")
} else {
Expand Down
40 changes: 28 additions & 12 deletions cmd/marcli/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@ import (
"github.com/hectorcorrea/marcli/pkg/marc"
)

var fileName, search, fields, exclude, format, hasFields string
var fileName, search, searchFields, fields, exclude, format, hasFields string
var start, count int
var debug bool

func init() {
flag.StringVar(&fileName, "file", "", "MARC file to process. Required.")
flag.StringVar(&search, "match", "", "String that must be present in the content of the record, case insensitive.")
flag.StringVar(&searchFields, "matchFields", "", "Comma delimited list of fields to search, used when match parameter is indicated, defaults to all fields.")
flag.StringVar(&fields, "fields", "", "Comma delimited list of fields to output.")
flag.StringVar(&exclude, "exclude", "", "Comma delimited list of fields to exclude from the output.")
flag.StringVar(&format, "format", "mrk", "Output format. Accepted values: mrk, mrc, xml, json, or solr.")
Expand All @@ -33,14 +34,15 @@ func main() {
}

params := ProcessFileParams{
filename: fileName,
searchValue: strings.ToLower(search),
filters: marc.NewFieldFilters(fields),
exclude: marc.NewFieldFilters(exclude),
start: start,
count: count,
hasFields: marc.NewFieldFilters(hasFields),
debug: debug,
filename: fileName,
searchValue: strings.ToLower(search),
searchFields: searchFieldsFromString(searchFields),
filters: marc.NewFieldFilters(fields),
exclude: marc.NewFieldFilters(exclude),
start: start,
count: count,
hasFields: marc.NewFieldFilters(hasFields),
debug: debug,
}

if len(params.filters.Fields) > 0 && len(params.exclude.Fields) > 0 {
Expand Down Expand Up @@ -73,12 +75,26 @@ func showSyntax() {
fmt.Printf("\r\n")
fmt.Printf(`
NOTES:
The match parameter is used to filter records based on the content of the
values in the record. The hasFields parameter is used to filter records based
on the presence of certain fields on the record (regardless of their value).
The match parameter is used to filter records based on their content.
By default marcli searches in all the fields for each record, you can use
the matchFields parameter to limit the search to only certain fields (subfields
are not supported in matchFields, i.e. 245 is OK, 245a is not)
The hasFields parameter is used to filter records based on the presence
of certain fields on the record (regardless of their value).
You can only use the fields or exclude parameter, but not both.
`)
fmt.Printf("\r\n")
fmt.Printf("\r\n")
}

func searchFieldsFromString(searchFieldsString string) []string {
values := []string{}
for _, value := range strings.Split(searchFieldsString, ",") {
if strings.TrimSpace(searchFieldsString) != "" {
values = append(values, value)
}
}
return values
}
2 changes: 1 addition & 1 deletion cmd/marcli/mrc.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func toMrc(params ProcessFileParams) error {
continue
}

if r.Contains(params.searchValue) && r.HasFields(params.hasFields) {
if r.Contains(params.searchValue, params.searchFields) && r.HasFields(params.hasFields) {
fmt.Printf("%s", r.Raw())
if out++; out == count {
break
Expand Down
2 changes: 1 addition & 1 deletion cmd/marcli/mrk.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ func toMrk(params ProcessFileParams) error {
continue
}

if r.Contains(params.searchValue) && r.HasFields(params.hasFields) {
if r.Contains(params.searchValue, params.searchFields) && r.HasFields(params.hasFields) {
str := ""
if params.filters.IncludeLeader() {
str += fmt.Sprintf("%s\r\n", r.Leader)
Expand Down
17 changes: 9 additions & 8 deletions cmd/marcli/processFileParams.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@ import (
)

type ProcessFileParams struct {
filename string
searchValue string
filters marc.FieldFilters
exclude marc.FieldFilters
start int
count int
hasFields marc.FieldFilters
debug bool
filename string
searchValue string
searchFields []string
filters marc.FieldFilters
exclude marc.FieldFilters
start int
count int
hasFields marc.FieldFilters
debug bool
}

func (p ProcessFileParams) HasFilters() bool {
Expand Down
2 changes: 1 addition & 1 deletion cmd/marcli/solr.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ func toSolr(params ProcessFileParams) error {
if i++; i < start {
continue
}
if r.Contains(params.searchValue) && r.HasFields(params.hasFields) {
if r.Contains(params.searchValue, params.searchFields) && r.HasFields(params.hasFields) {
if out > 0 {
fmt.Printf(",\r\n")
} else {
Expand Down
2 changes: 1 addition & 1 deletion cmd/marcli/xml.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func toXML(params ProcessFileParams) error {
continue
}

if r.Contains(params.searchValue) && r.HasFields(params.hasFields) {
if r.Contains(params.searchValue, params.searchFields) && r.HasFields(params.hasFields) {
str, err := recordToXML(r, params)
if err != nil {
if params.debug {
Expand Down
27 changes: 25 additions & 2 deletions pkg/marc/record.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,25 @@ type Record struct {
}

// Contains returns true if Record contains the value passed.
func (r Record) Contains(searchValue string) bool {
// If searchFieldList is an empty array it searches in all fields for the record
// otherwise the search is limited to only the fields in the array.
func (r Record) Contains(searchValue string, searchFieldsList []string) bool {
if searchValue == "" {
return true
}
for _, field := range r.Fields {

var searchFields []Field
if len(searchFieldsList) == 0 {
searchFields = r.Fields
} else {
for _, field := range r.Fields {
if r.arrayContains(searchFieldsList, field.Tag) {
searchFields = append(searchFields, field)
}
}
}

for _, field := range searchFields {
if field.Contains(searchValue) {
return true
}
Expand Down Expand Up @@ -175,3 +189,12 @@ func (r Record) GetValues(tag string, subfield string) []string {
}
return values
}

func (r Record) arrayContains(array []string, value string) bool {
for _, element := range array {
if element == value {
return true
}
}
return false
}

0 comments on commit ae5f33b

Please sign in to comment.