-
Notifications
You must be signed in to change notification settings - Fork 0
/
data.h
202 lines (178 loc) · 4.7 KB
/
data.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#ifndef DATA_H_
# define DATA_H_
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <unistd.h>
#include <getopt.h>
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <ctype.h>
#include <obstack.h>
#define obstack_chunk_alloc xmalloc
#define obstack_chunk_free free
struct sedOptions {
unsigned silent: 1;
unsigned follow_symlinks: 1;
unsigned in_place: 1;
unsigned null_data: 1;
unsigned separate: 1;
unsigned extended_regex_syntax: 1;
} g_sedOptions;
struct sedRuntime {
int current;
int lookahead; // Resolve addresses like '$' and '-5'
struct vbuf *str_output; // If writing to a string
} g_lineInfo;
/*
** Buffered stream
*/
struct zbuf {
int line;
char *cursor;
char last;
struct zbuflist {
char *buf;
size_t alloc;
char *filename;
FILE *file;
struct zbuflist *next; // when multiple -f, -e scripts
} *info;
} g_in;
/*
** Group all globals
** note: currently unused
*/
struct sedState {
char const *progname;
struct sedOptions opts;
struct sedRuntime run;
struct zbuf script_in;
} state;
char nextChar();
#define prevChar(c) *--g_in.cursor
char nextProgStream();
#define vbuf_init(text) (text)->buf = xmalloc((text)->alloc = 256); (text)->len = 0;
struct vbuf {// vector buffers grow by xrealloc
char *buf;
ssize_t len;
ssize_t alloc;
};
/*
** struct sedLine:
** Data for pattern and holdspace - contiguous in memory,
** since we must sometimes call regexec on the entire region.
** todo: line sizes should be affected by number of N,G,H cmds
*/
struct sedLine {
char *buf;
size_t alloc;
char *active;
size_t len;
bool chomped; // almost always true
};
/*
** ----------- Compiled Script datatypes -----------
*/
struct sedAddr { // details for sedCmdAddr
enum sedAddrType {
ADDR_NONE, // for cmd adresses like '1,' and ',9'
ADDR_LINE,
ADDR_REGEX,
} type;
union {
int line;
regex_t regex;
} info;
};
struct sedCmdAddr {
enum sedCmdAddrType { // do we use a1, a2, step ?
CMD_ADDR_DONE, // address will never match again
CMD_ADDR_LINE, // use only a1
CMD_ADDR_RANGE, // use a1, a2
CMD_ADDR_STEP, // use a1, a2, step
} type;
unsigned bang: 1; // '!'
struct sedAddr a1, a2;
size_t step;
};
struct sedCmd {
struct sedCmdAddr *addr; // NULL matches unconditionally
char cmdChar;
union {
struct SCmd *s;
char *y;
struct vbuf *text; // a, i, c
int int_arg; // q, Q
struct {
FILE *file;
char const *filename;
} file; // w, W, r, R
struct sedProgram *jmp; // b, t, T
} info;
};
/*
** a sedProgram is a circular list of sedCmds in an obstack -
** to reduce pagefaults, fragmentation, del cmds quick
** and less malloc time/memoryy overhead
** first entry contains no cmd info
*/
struct sedProgram {
struct sedProgram *next;
struct sedCmd cmd;
};
/*
** ----------- S command -----------
** replacement is generated by concatenating the recipe
** recipe values of 0 to 9 are backreferences,
** (char *) -1 ends the recipe,
** and other values point somewhere in 'text'.
** Note: will using these low addresses ever cause a problem ?
** if malloc() returns something <10, it will probably cause
** a segfault as sed tries to access the associated backref
*/
struct SReplacement {
char *text; //private reference string
size_t n_refs;
char **recipe;
};
struct SCmd {
regex_t pattern;
struct SReplacement new;
// - options -
unsigned g: 1;
unsigned p: 1;
unsigned e: 1;
unsigned d: 1; // if 'p' given before 'e'
FILE *w;
int number;
// 'i' and 'm' are passed to regcomp()
};
/*
** ----------- Function Prototypes -----------
*/
void *xmalloc(size_t len);
void *xrealloc(void *ptr, size_t len);
FILE *xfopen(const char *f_name, const char *mode);
int xregcomp(regex_t *, const char *, int);
struct vbuf *vbuf_new();
ssize_t vbuf_getline(struct vbuf *text, FILE *in);
struct vbuf *read_text();
struct vbuf *snarf(char delim, char regex);
struct vbuf *vbuf_readName();
char *vbuf_tostring(struct vbuf *);
struct sedLine *sedLine_new();
void sedLine_appendText(struct sedLine *l, char *text, int len);
void sedLine_deleteEmbeddedLine(struct sedLine *l, FILE *out);
void sedLine_appendLineList(struct sedLine *line, struct sedLine *ap);
void sedLine_deleteLine(struct sedLine *l, FILE *out);
struct sedProgram *compile_file(struct sedProgram *compile, const char *f_name);
struct sedProgram *compile_string(struct sedProgram *compile, char *str);
char exec_stream(struct sedProgram *prog, char **files);
#endif // ifndef DATA_H_