Skip to content

Commit

Permalink
bluesentry#142 Detects encoding of clamav process output and decodes …
Browse files Browse the repository at this point in the history
…accordingly
  • Loading branch information
tohoku committed Jan 26, 2022
1 parent 0e86c59 commit 9ef9c91
Showing 1 changed file with 12 additions and 1 deletion.
13 changes: 12 additions & 1 deletion clamav.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import chardet
import datetime
import hashlib
import os
Expand Down Expand Up @@ -183,6 +184,14 @@ def scan_output_to_json(output):
summary[key] = value.strip()
return summary

# Detect the most likely character encoding of input
def detect_encoding(line):
chardet_encoding = chardet.detect(line)
print("Most likely encoding: %s" % chardet_encoding)
if chardet_encoding['confidence'] > 0.8
return chardet_encoding['encoding']
else
return None

def scan_file(path):
av_env = os.environ.copy()
Expand All @@ -194,7 +203,9 @@ def scan_file(path):
stdout=subprocess.PIPE,
env=av_env,
)
output = av_proc.communicate()[0].decode()
result = av_proc.communicate()[0]
result_encoding = detect_encoding(result)
output = result.decode(result_encoding) if result_encoding is not None else result.decode('utf-8')
print("clamscan output:\n%s" % output)

# Turn the output into a data source we can read
Expand Down

0 comments on commit 9ef9c91

Please sign in to comment.