Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clustur patch fixes windows and names #55

Merged
merged 3 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: clustur
Type: Package
Title: Clustering
Version: 0.1.1
Version: 0.1.2
Date: 2024-11-25
Authors@R: c(
person("Gregory", "Johnson", , "[email protected]", role = c("aut"),
Expand Down
2 changes: 1 addition & 1 deletion src/MothurDependencies/SharedFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class SharedFile {
explicit SharedFile(const std::vector<SharedAbundance>& otherTidySharedList)
: tidySharedList(otherTidySharedList) {
}
Rcpp::DataFrame PrintData() const;
Rcpp::DataFrame PrintData(const std::string &binName) const;
private:
std::vector<SharedAbundance> tidySharedList;
};
Expand Down
2 changes: 1 addition & 1 deletion src/MothurDependencies/SharedFileBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
class SharedFileBuilder {
public:
SharedFile *BuildSharedFile(const ListVector &listVector,
const CountTableAdapter& countTable);
const CountTableAdapter& countTable, const std::string &binName);
SharedFileBuilder() = default;
private:
struct SampleInformation {
Expand Down
4 changes: 2 additions & 2 deletions src/SharedFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include "MothurDependencies/ClusterExport.h"


Rcpp::DataFrame SharedFile::PrintData() const {
Rcpp::DataFrame SharedFile::PrintData(const std::string &binName) const {
const size_t size = tidySharedList.size();
std::vector<std::string> groups(size);
std::vector<std::string> otus(size);
Expand All @@ -18,6 +18,6 @@ Rcpp::DataFrame SharedFile::PrintData() const {
abundanceList[count++] = abundances.groupAbundance;
}
return Rcpp::DataFrame::create(Rcpp::Named("samples") = groups,
Rcpp::Named("otu") = otus,
Rcpp::Named(binName) = otus,
Rcpp::Named("abundance") = abundanceList);
}
4 changes: 2 additions & 2 deletions src/SharedFileBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
// TODO Comment this code
// TODO We may need to build a traditional file builder...So we can output a dataframe of how the clusters are (list)
SharedFile* SharedFileBuilder::BuildSharedFile(const ListVector &listVector,
const CountTableAdapter &countTable) {
const CountTableAdapter &countTable, const std::string &binName) {
Utils utils;
std::string largestCutoffLabel = listVector.getLabel();
std::vector<SharedAbundance> abundancesList;
Expand All @@ -20,7 +20,7 @@ SharedFile* SharedFileBuilder::BuildSharedFile(const ListVector &listVector,
if(samples.empty())
continue;
std::vector<std::string> splitSamples;
std::string otuName = "otu" + std::to_string(count++);
std::string otuName = binName + std::to_string(count++);
utils.splitAtComma(samples, splitSamples);
std::unordered_map<std::string, double> totalAbundanceInEachGroup;
for(const auto& sample : splitSamples) {
Expand Down
2 changes: 1 addition & 1 deletion src/SharedFileBuilderTestFixture.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
bool SharedFileBuilderTestFixture::TestBuildSharedFile(const ListVector& listVector,
const CountTableAdapter &countTable, const bool expectedResult) {
Setup();
const SharedFile* file = builder->BuildSharedFile(listVector, countTable);
const SharedFile* file = builder->BuildSharedFile(listVector, countTable, "otu");
TearDown();
return expectedResult == (file != nullptr);

Expand Down
2 changes: 1 addition & 1 deletion src/SharedFileTestFixture.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ bool SharedFileTestFixture::TestSharedFilePrintData(const std::vector<SharedAbun
const Rcpp::DataFrame &expectedResult) {
Setup();
sharedFile = new SharedFile(data);
Rcpp::DataFrame df = sharedFile->PrintData();
Rcpp::DataFrame df = sharedFile->PrintData("otu");
const std::vector<std::string> columnNames = df.names();
const std::vector<std::string> expectedNames = expectedResult.names();
TearDown();
Expand Down
17 changes: 10 additions & 7 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "Adapters/OptimatrixAdapter.h"
#include "Adapters/MatrixAdapter.h"
#include "MothurDependencies/ClusterCommand.h"
#include "MothurDependencies/ListVector.h"
#include "MothurDependencies/OptiMatrix.h"
#include "Adapters/CountTableAdapter.h"
#include "MothurDependencies/ColumnDistanceMatrixReader.h"
Expand All @@ -13,13 +14,14 @@
#include <cctype>


Rcpp::DataFrame CreateSharedDataFrame(const CountTableAdapter& countTable, const ClusterExport* result) {
Rcpp::DataFrame CreateSharedDataFrame(const CountTableAdapter& countTable, const ClusterExport* result,
const std::string& binName) {
SharedFileBuilder builder;
std::unordered_map<std::string, RAbundVector> map;
std::unordered_map<std::string, ListVector> listMap;
const ListVectorPair listVectors = result->GetListVector();
const SharedFile* sharedFile = builder.BuildSharedFile(*listVectors.listVector, countTable);
Rcpp::DataFrame tidySharedDataFrame = sharedFile->PrintData();
const SharedFile* sharedFile = builder.BuildSharedFile(*listVectors.listVector, countTable, binName);
Rcpp::DataFrame tidySharedDataFrame = sharedFile->PrintData(binName);
delete(sharedFile);
return tidySharedDataFrame;
}
Expand Down Expand Up @@ -69,8 +71,9 @@ SEXP ProcessSparseMatrix(const std::vector<int> &xPosition,
CountTableAdapter countTableAdapter;
countTableAdapter.CreateDataFrameMap(countTable);
MatrixAdapter adapter(xPosition, yPosition, data, cutoff, isSim, countTableAdapter);
auto* read = new DistanceFileReader(new SparseDistanceMatrix(adapter.CreateSparseMatrix()),
new ListVector(adapter.CreateListVector()), cutoff, isSim);
auto* sparseDistanceMatrix = new SparseDistanceMatrix(adapter.CreateSparseMatrix());
auto* listVec = new ListVector(adapter.CreateListVector());
auto* read = new DistanceFileReader(sparseDistanceMatrix,listVec,cutoff, isSim);
read->CreateCountTableAdapter(countTable);
return Rcpp::XPtr<DistanceFileReader>(read);
}
Expand Down Expand Up @@ -103,7 +106,7 @@ Rcpp::List Cluster(const SEXP& DistanceData,const std::string& method, const std
const auto label = result->GetListVector().label;
const Rcpp::DataFrame clusterDataFrame = result->GetListVector().listVector->CreateDataFrameFromList(
featureColumnName, binColumnName);
const Rcpp::DataFrame tidySharedDataFrame = CreateSharedDataFrame(countTableAdapter, result);
const Rcpp::DataFrame tidySharedDataFrame = CreateSharedDataFrame(countTableAdapter, result, binColumnName);
delete(result);
delete(listVector);
delete(sparseMatrix);
Expand All @@ -129,7 +132,7 @@ Rcpp::List OptiCluster(const SEXP& DistanceData, const std::string& featureColum
const auto label = result->GetListVector().label;
const Rcpp::DataFrame clusterDataFrame = result->GetListVector().listVector->CreateDataFrameFromList(
featureColumnName, binColumnName);
const Rcpp::DataFrame tidySharedDataFrame = CreateSharedDataFrame(countTableAdapter, result);
const Rcpp::DataFrame tidySharedDataFrame = CreateSharedDataFrame(countTableAdapter, result, binColumnName);
delete(result);
return Rcpp::List::create(Rcpp::Named("label") = std::stod(label),
Rcpp::Named("abundance") = tidySharedDataFrame,
Expand Down
1 change: 0 additions & 1 deletion vignettes/clustur.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ cluster_data <- cluster(column_distance, cutoff, method = "weighted")

## Output data from clustering

#### edit this paragraph further...
All methods produce a list object with an indicator of the cutoff that was used
(`label`), as well as cluster composition (`cluster`) and shared (`abundance`) data frames.
The `clusters` data frame shows which OTU (Operation Taxonomic Unit) each sequence was assigned to. The `abundance` data frame
Expand Down
Loading