Skip to content

Commit

Permalink
Merge branch 'master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
abhishek9998 authored Aug 1, 2024
2 parents 8e6d47c + f14121f commit 7f519c2
Show file tree
Hide file tree
Showing 3,376 changed files with 208,914 additions and 303,802 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
2 changes: 1 addition & 1 deletion .devcontainer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ You can use the dev container configuration in this folder to build and run the
You may use the button above, or follow these steps to open this repo in a Codespace:
1. Click the **Code** drop-down menu at the top of https://github.com/langchain-ai/langchain.
1. Click on the **Codespaces** tab.
1. Click **Create codespace on master** .
1. Click **Create codespace on master**.

For more info, check out the [GitHub documentation](https://docs.github.com/en/free-pro-team@latest/github/developing-online-with-codespaces/creating-a-codespace#creating-a-codespace).

Expand Down
6 changes: 2 additions & 4 deletions .devcontainer/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ services:
dockerfile: libs/langchain/dev.Dockerfile
context: ..
volumes:
# Update this to wherever you want VS Code to mount the folder of your project
# Update this to wherever you want VS Code to mount the folder of your project
- ..:/workspaces/langchain:cached
networks:
- langchain-network
- langchain-network
# environment:
# MONGO_ROOT_USERNAME: root
# MONGO_ROOT_PASSWORD: example123
Expand All @@ -28,5 +28,3 @@ services:
networks:
langchain-network:
driver: bridge


3 changes: 0 additions & 3 deletions .github/ISSUE_TEMPLATE/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@ contact_links:
- name: 🤔 Question or Problem
about: Ask a question or ask about a problem in GitHub Discussions.
url: https://www.github.com/langchain-ai/langchain/discussions/categories/q-a
- name: Discord
url: https://discord.gg/6adMQxSpJS
about: General community discussions
- name: Feature Request
url: https://www.github.com/langchain-ai/langchain/discussions/categories/ideas
about: Suggest a feature or an idea
Expand Down
9 changes: 8 additions & 1 deletion .github/ISSUE_TEMPLATE/documentation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ body:
[LangChain Github Discussions](https://github.com/langchain-ai/langchain/discussions),
[LangChain Github Issues](https://github.com/langchain-ai/langchain/issues?q=is%3Aissue),
[LangChain ChatBot](https://chat.langchain.com/)
- type: input
id: url
attributes:
label: URL
description: URL to documentation
validations:
required: false
- type: checkboxes
id: checks
attributes:
Expand All @@ -48,4 +55,4 @@ body:
label: "Idea or request for content:"
description: >
Please describe as clearly as possible what topics you think are missing
from the current documentation.
from the current documentation.
2 changes: 1 addition & 1 deletion .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@ Additional guidelines:
- Changes should be backwards compatible.
- If you are adding something to community, do not re-import it in langchain.

If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, hwchase17.
If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, ccurme, vbarda, hwchase17.
43 changes: 24 additions & 19 deletions .github/actions/people/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,11 +350,7 @@ def get_graphql_pr_edges(*, settings: Settings, after: Union[str, None] = None):
print("Querying PRs...")
else:
print(f"Querying PRs with cursor {after}...")
data = get_graphql_response(
settings=settings,
query=prs_query,
after=after
)
data = get_graphql_response(settings=settings, query=prs_query, after=after)
graphql_response = PRsResponse.model_validate(data)
return graphql_response.data.repository.pullRequests.edges

Expand Down Expand Up @@ -484,10 +480,16 @@ def get_contributors(settings: Settings):
lines_changed = pr.additions + pr.deletions
score = _logistic(files_changed, 20) + _logistic(lines_changed, 100)
contributor_scores[pr.author.login] += score
three_months_ago = (datetime.now(timezone.utc) - timedelta(days=3*30))
three_months_ago = datetime.now(timezone.utc) - timedelta(days=3 * 30)
if pr.createdAt > three_months_ago:
recent_contributor_scores[pr.author.login] += score
return contributors, contributor_scores, recent_contributor_scores, reviewers, authors
return (
contributors,
contributor_scores,
recent_contributor_scores,
reviewers,
authors,
)


def get_top_users(
Expand Down Expand Up @@ -524,9 +526,13 @@ def get_top_users(
# question_commentors, question_last_month_commentors, question_authors = get_experts(
# settings=settings
# )
contributors, contributor_scores, recent_contributor_scores, reviewers, pr_authors = get_contributors(
settings=settings
)
(
contributors,
contributor_scores,
recent_contributor_scores,
reviewers,
pr_authors,
) = get_contributors(settings=settings)
# authors = {**question_authors, **pr_authors}
authors = {**pr_authors}
maintainers_logins = {
Expand All @@ -537,14 +543,17 @@ def get_top_users(
"nfcampos",
"efriis",
"eyurtsev",
"rlancemartin"
"rlancemartin",
"ccurme",
"vbarda",
}
hidden_logins = {
"dev2049",
"vowelparrot",
"obi1kenobi",
"langchain-infra",
"jacoblee93",
"isahers1",
"dqbd",
"bracesproul",
"akira",
Expand All @@ -556,7 +565,7 @@ def get_top_users(
maintainers.append(
{
"login": login,
"count": contributors[login], #+ question_commentors[login],
"count": contributors[login], # + question_commentors[login],
"avatarUrl": user.avatarUrl,
"twitterUsername": user.twitterUsername,
"url": user.url,
Expand Down Expand Up @@ -612,9 +621,7 @@ def get_top_users(
new_people_content = yaml.dump(
people, sort_keys=False, width=200, allow_unicode=True
)
if (
people_old_content == new_people_content
):
if people_old_content == new_people_content:
logging.info("The LangChain People data hasn't changed, finishing.")
sys.exit(0)
people_path.write_text(new_people_content, encoding="utf-8")
Expand All @@ -627,9 +634,7 @@ def get_top_users(
logging.info(f"Creating a new branch {branch_name}")
subprocess.run(["git", "checkout", "-B", branch_name], check=True)
logging.info("Adding updated file")
subprocess.run(
["git", "add", str(people_path)], check=True
)
subprocess.run(["git", "add", str(people_path)], check=True)
logging.info("Committing updated file")
message = "👥 Update LangChain people data"
result = subprocess.run(["git", "commit", "-m", message], check=True)
Expand All @@ -638,4 +643,4 @@ def get_top_users(
logging.info("Creating PR")
pr = repo.create_pull(title=message, body=message, base="master", head=branch_name)
logging.info(f"Created PR: {pr.number}")
logging.info("Finished")
logging.info("Finished")
163 changes: 148 additions & 15 deletions .github/scripts/check_diff.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,137 @@
import glob
import json
import sys
import os
from typing import Dict
import re
import sys
import tomllib
from collections import defaultdict
from typing import Dict, List, Set
from pathlib import Path


LANGCHAIN_DIRS = [
"libs/core",
"libs/text-splitters",
"libs/community",
"libs/langchain",
"libs/community",
"libs/experimental",
]


def all_package_dirs() -> Set[str]:
return {
"/".join(path.split("/")[:-1]).lstrip("./")
for path in glob.glob("./libs/**/pyproject.toml", recursive=True)
if "libs/cli" not in path and "libs/standard-tests" not in path
}


def dependents_graph() -> dict:
"""
Construct a mapping of package -> dependents, such that we can
run tests on all dependents of a package when a change is made.
"""
dependents = defaultdict(set)

for path in glob.glob("./libs/**/pyproject.toml", recursive=True):
if "template" in path:
continue

# load regular and test deps from pyproject.toml
with open(path, "rb") as f:
pyproject = tomllib.load(f)["tool"]["poetry"]

pkg_dir = "libs" + "/".join(path.split("libs")[1].split("/")[:-1])
for dep in [
*pyproject["dependencies"].keys(),
*pyproject["group"]["test"]["dependencies"].keys(),
]:
if "langchain" in dep:
dependents[dep].add(pkg_dir)
continue

# load extended deps from extended_testing_deps.txt
package_path = Path(path).parent
extended_requirement_path = package_path / "extended_testing_deps.txt"
if extended_requirement_path.exists():
with open(extended_requirement_path, "r") as f:
extended_deps = f.read().splitlines()
for depline in extended_deps:
if depline.startswith("-e "):
# editable dependency
assert depline.startswith(
"-e ../partners/"
), "Extended test deps should only editable install partner packages"
partner = depline.split("partners/")[1]
dep = f"langchain-{partner}"
else:
dep = depline.split("==")[0]

if "langchain" in dep:
dependents[dep].add(pkg_dir)
return dependents


def add_dependents(dirs_to_eval: Set[str], dependents: dict) -> List[str]:
updated = set()
for dir_ in dirs_to_eval:
# handle core manually because it has so many dependents
if "core" in dir_:
updated.add(dir_)
continue
pkg = "langchain-" + dir_.split("/")[-1]
updated.update(dependents[pkg])
updated.add(dir_)
return list(updated)


def _get_configs_for_single_dir(job: str, dir_: str) -> List[Dict[str, str]]:
min_python = "3.8"
max_python = "3.12"

# custom logic for specific directories
if dir_ == "libs/partners/milvus":
# milvus poetry doesn't allow 3.12 because they
# declare deps in funny way
max_python = "3.11"

if dir_ in ["libs/community", "libs/langchain"] and job == "extended-tests":
# community extended test resolution in 3.12 is slow
# even in uv
max_python = "3.11"

if dir_ == "libs/community" and job == "compile-integration-tests":
# community integration deps are slow in 3.12
max_python = "3.11"

return [
{"working-directory": dir_, "python-version": min_python},
{"working-directory": dir_, "python-version": max_python},
]


def _get_configs_for_multi_dirs(
job: str, dirs_to_run: List[str], dependents: dict
) -> List[Dict[str, str]]:
if job == "lint":
dirs = add_dependents(
dirs_to_run["lint"] | dirs_to_run["test"] | dirs_to_run["extended-test"],
dependents,
)
elif job in ["test", "compile-integration-tests", "dependencies"]:
dirs = add_dependents(
dirs_to_run["test"] | dirs_to_run["extended-test"], dependents
)
elif job == "extended-tests":
dirs = list(dirs_to_run["extended-test"])
else:
raise ValueError(f"Unknown job: {job}")

return [
config for dir_ in dirs for config in _get_configs_for_single_dir(job, dir_)
]


if __name__ == "__main__":
files = sys.argv[1:]

Expand All @@ -21,10 +142,11 @@
}
docs_edited = False

if len(files) == 300:
if len(files) >= 300:
# max diff length is 300 files - there are likely files missing
raise ValueError("Max diff reached. Please manually run CI on changed libs.")

dirs_to_run["lint"] = all_package_dirs()
dirs_to_run["test"] = all_package_dirs()
dirs_to_run["extended-test"] = set(LANGCHAIN_DIRS)
for file in files:
if any(
file.startswith(dir_)
Expand Down Expand Up @@ -81,14 +203,25 @@
docs_edited = True
dirs_to_run["lint"].add(".")

outputs = {
"dirs-to-lint": list(
dirs_to_run["lint"] | dirs_to_run["test"] | dirs_to_run["extended-test"]
),
"dirs-to-test": list(dirs_to_run["test"] | dirs_to_run["extended-test"]),
"dirs-to-extended-test": list(dirs_to_run["extended-test"]),
"docs-edited": "true" if docs_edited else "",
dependents = dependents_graph()

# we now have dirs_by_job
# todo: clean this up

map_job_to_configs = {
job: _get_configs_for_multi_dirs(job, dirs_to_run, dependents)
for job in [
"lint",
"test",
"extended-tests",
"compile-integration-tests",
"dependencies",
]
}
for key, value in outputs.items():
map_job_to_configs["test-doc-imports"] = (
[{"python-version": "3.12"}] if docs_edited else []
)

for key, value in map_job_to_configs.items():
json_output = json.dumps(value)
print(f"{key}={json_output}") # noqa: T201
print(f"{key}={json_output}")
35 changes: 35 additions & 0 deletions .github/scripts/check_prerelease_dependencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import sys
import tomllib

if __name__ == "__main__":
# Get the TOML file path from the command line argument
toml_file = sys.argv[1]

# read toml file
with open(toml_file, "rb") as file:
toml_data = tomllib.load(file)

# see if we're releasing an rc
version = toml_data["tool"]["poetry"]["version"]
releasing_rc = "rc" in version

# if not, iterate through dependencies and make sure none allow prereleases
if not releasing_rc:
dependencies = toml_data["tool"]["poetry"]["dependencies"]
for lib in dependencies:
dep_version = dependencies[lib]
dep_version_string = (
dep_version["version"] if isinstance(dep_version, dict) else dep_version
)

if "rc" in dep_version_string:
raise ValueError(
f"Dependency {lib} has a prerelease version. Please remove this."
)

if isinstance(dep_version, dict) and dep_version.get(
"allow-prereleases", False
):
raise ValueError(
f"Dependency {lib} has allow-prereleases set to true. Please remove this."
)
Loading

0 comments on commit 7f519c2

Please sign in to comment.