Skip to content

Commit

Permalink
Merge pull request #1 from hectorcorrea/newreader
Browse files Browse the repository at this point in the history
Refactor
  • Loading branch information
hectorcorrea authored Oct 16, 2019
2 parents 1ddc7b8 + cc27e99 commit 81094ab
Show file tree
Hide file tree
Showing 21 changed files with 776 additions and 915 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/tmp/*
/brown/*
.DS_Store
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ A letter (or letters) after the field tag indicates to output only those
subfields. For example "907xz" means output subfield "x" and "z" in
field "907".

The program supports a `format` parameter to output to other formats other than MARC line delimited (MRK) such as JSON or MARC binary. Notice that not all the features are avilable in all the formats yet.

You can also pass `start` and `count` parameters to output only a range of MARC records.


## Sample data
Files under `./data/` are small MARC files that I use for testing.
Expand All @@ -49,7 +53,7 @@ Download the code and play with it:
git clone https://github.com/hectorcorrea/marcli.git
cd marcli
go build
./marcli -file data/test_1a.mrc
./marcli -file data/test_1a.mrc
```


Expand Down
41 changes: 0 additions & 41 deletions dirEntry.go

This file was deleted.

64 changes: 64 additions & 0 deletions export/json.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package export

import (
"encoding/json"
"errors"
"fmt"
"io"
"marcli/marc"
"os"
)

// TODO: Add support for JSONL (JSON line delimited) format that makes JSON
// easier to parse with Unix tools like grep, tail, and so on.
func ToJson(filename string, searchValue string, filters marc.FieldFilters, start int, count int) error {
if len(filters.Fields) > 0 {
return errors.New("filters not supported for this format")
}

if count == 0 {
return nil
}

file, err := os.Open(filename)
if err != nil {
return err
}
defer file.Close()

var i, out int
marc := marc.NewMarcFile(file)

fmt.Printf("[")
for marc.Scan() {
r, err := marc.Record()
if err == io.EOF {
break
}
if err != nil {
return err
}
if i++; i < start {
continue
}
if r.Contains(searchValue) {
if out > 0 {
fmt.Printf(",\r\n")
} else {
fmt.Printf("\r\n")
}
b, err := json.Marshal(r.Filter(filters))
if err != nil {
fmt.Printf("%s\r\n", err)
}
// fmt.Printf("{ \"record\": %s}\r\n", b)
fmt.Printf("%s", b)
if out++; out == count {
break
}
}
}
fmt.Printf("\r\n]\r\n")

return marc.Err()
}
49 changes: 49 additions & 0 deletions export/mrc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package export

import (
"errors"
"fmt"
"io"
"marcli/marc"
"os"
)

func ToMrc(filename string, searchValue string, filters marc.FieldFilters, start int, count int) error {
if len(filters.Fields) > 0 {
return errors.New("filters not supported for this format")
}

if count == 0 {
return nil
}

file, err := os.Open(filename)
if err != nil {
return err
}
defer file.Close()

var i, out int
marc := marc.NewMarcFile(file)
for marc.Scan() {
r, err := marc.Record()
if err == io.EOF {
break
}
if err != nil {
return err
}

if i++; i < start {
continue
}

if r.Contains(searchValue) {
fmt.Printf("%s", r.Raw())
if out++; out == count {
break
}
}
}
return marc.Err()
}
55 changes: 55 additions & 0 deletions export/mrk.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package export

import (
"fmt"
"io"
"marcli/marc"
"os"
)

func ToMrk(filename string, searchValue string, filters marc.FieldFilters, start int, count int) error {
if count == 0 {
return nil
}

file, err := os.Open(filename)
if err != nil {
return err
}
defer file.Close()

var i, out int
marc := marc.NewMarcFile(file)
for marc.Scan() {

r, err := marc.Record()
if err == io.EOF {
break
}
if err != nil {
return err
}

if i++; i < start {
continue
}

if r.Contains(searchValue) {
str := ""
if filters.IncludeLeader() {
str += fmt.Sprintf("%s\r\n", r.Leader)
}
for _, field := range r.Filter(filters) {
str += fmt.Sprintf("%s\r\n", field)
}
if str != "" {
fmt.Printf("%s\r\n", str)
if out++; out == count {
break
}
}
}
}

return marc.Err()
}
152 changes: 152 additions & 0 deletions export/solr.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
package export

import (
"encoding/json"
"errors"
"fmt"
"io"
"marcli/marc"
"os"
"strings"
)

type SolrDocument struct {
Id string `json:"id"`
Author string `json:"author_txt_en,omitempty"`
AuthorDate string `json:"authorDate_s,omitempty"`
AuthorFuller string `json:"authorFuller_txt_en,omitempty"`
AuthorsOther []string `json:"authorsOther_txts_en,omitempty"`
Title string `json:"title_txt_en,omitempty"`
Responsibility string `json:"responsibility_txt_en,omitempty"`
Publisher string `json:"publisher_txt_en,omitempty"`
Urls []string `json:"urls_ss,omitempty"`
Subjects []string `json:"subjects_txts_en,omitempty"`
SubjectsForm []string `json:"subjectsForm_txts_en,omitempty"`
SubjectsGeneral []string `json:"subjectsGeneral_txts_en,omitempty"`
SubjectsChrono []string `json:"subjectsChrono_txts_en,omitempty"`
SubjectsGeo []string `json:"subjectsGeo_txts_en,omitempty"`
}

func NewSolrDocument(r marc.Record) SolrDocument {
doc := SolrDocument{}
id := r.GetValue("001", "")
if id == "" {
id = "INVALID"
}
doc.Id = strings.TrimSpace(id)
author := r.GetValue("100", "a")
if author != "" {
doc.Author = author
doc.AuthorDate = r.GetValue("100", "d")
doc.AuthorFuller = r.GetValue("100", "q")
} else {
doc.Author = r.GetValue("110", "a")
doc.AuthorDate = ""
doc.AuthorFuller = ""
}
doc.AuthorsOther = r.GetValues("700", "a")

titleA := r.GetValue("245", "a")
titleB := r.GetValue("245", "b")
titleC := r.GetValue("245", "c")
doc.Title = concat(titleA, titleB)
doc.Responsibility = titleC

doc.Publisher = r.GetValue("260", "a")
doc.Urls = r.GetValues("856", "u")
doc.Subjects = subjects(r, "a")
doc.SubjectsForm = subjects(r, "v")
doc.SubjectsGeneral = subjects(r, "x")
doc.SubjectsChrono = subjects(r, "y")
doc.SubjectsGeo = subjects(r, "z")
return doc
}

func ToSolr(filename string, searchValue string, filters marc.FieldFilters, start int, count int) error {
if len(filters.Fields) > 0 {
return errors.New("filters not supported for this format")
}

if count == 0 {
return nil
}

file, err := os.Open(filename)
if err != nil {
return err
}
defer file.Close()

var i, out int
marc := marc.NewMarcFile(file)

fmt.Printf("[")
for marc.Scan() {
r, err := marc.Record()
if err == io.EOF {
break
}
if err != nil {
return err
}
if i++; i < start {
continue
}
if r.Contains(searchValue) {
if out > 0 {
fmt.Printf(",\r\n")
} else {
fmt.Printf("\r\n")
}
doc := NewSolrDocument(r)
b, err := json.Marshal(doc)
if err != nil {
fmt.Printf("%s\r\n", err)
}
fmt.Printf("%s", b)
if out++; out == count {
break
}
}
}
fmt.Printf("\r\n]\r\n")

return marc.Err()
}

func subjects(r marc.Record, subfield string) []string {
var values []string
for _, fieldValue := range r.GetValues("650", subfield) {
values = append(values, trimPeriod(fieldValue))
}
return values
}

func concat(a, b string) string {
return _concat(a, b, " ")
}

func concatTab(a, b string) string {
return _concat(a, b, "\t")
}

func _concat(a, b, sep string) string {
if a == "" && b == "" {
return ""
} else if a == "" && b != "" {
return strings.TrimSpace(b)
} else if a != "" && b == "" {
return strings.TrimSpace(a)
}
return strings.TrimSpace(a) + sep + strings.TrimSpace(b)
}

func trimPeriod(s string) string {
if s == "" || s == "." {
return ""
}
if strings.HasSuffix(s, ".") {
return strings.TrimSpace(s[:len(s)-1])
}
return s
}
Loading

0 comments on commit 81094ab

Please sign in to comment.