add files

2024-12-04 16:52:06 +07:00
parent 7b455521c6
commit ff4fdee995
6 changed files with 1411 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,175 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 .pybuilder/
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 # .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
 #poetry.lock
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
 #pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
 #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
 .pdm.toml
 .pdm-python
 .pdm-build/
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # pytype static type analyzer
 .pytype/
 # Cython debug symbols
 cython_debug/
 # PyCharm
 #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 .idea/
 # gitignore template for Jupyter Notebooks
 # website: http://jupyter.org/
 .ipynb_checkpoints
 */.ipynb_checkpoints/*
 # IPython
 profile_default/
 ipython_config.py
 # Remove previous ipynb_checkpoints
 #   git rm -r .ipynb_checkpoints/
--- a/.jupyter/lab/workspaces/default-37a8.jupyterlab-workspace
+++ b/.jupyter/lab/workspaces/default-37a8.jupyterlab-workspace
@@ -0,0 +1 @@
 {"data":{"layout-restorer:data":{"main":{"dock":{"type":"tab-area","currentIndex":2,"widgets":["notebook:analisis.ipynb","csvviewer:data/articles.csv"]},"current":"csvviewer:data/articles.csv"},"down":{"size":0,"widgets":[]},"left":{"collapsed":false,"visible":true,"current":"filebrowser","widgets":["filebrowser","running-sessions","git-sessions","@jupyterlab/toc:plugin","extensionmanager.main-view"],"widgetStates":{"jp-running-sessions":{"sizes":[0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666],"expansionStates":[false,false,false,false,false,false]},"extensionmanager.main-view":{"sizes":[0.5,0.5],"expansionStates":[false,false]}}},"right":{"collapsed":true,"visible":true,"widgets":["jp-property-inspector","debugger-sidebar"],"widgetStates":{"jp-debugger-sidebar":{"sizes":[0.2,0.2,0.2,0.2,0.2],"expansionStates":[false,false,false,false,false]}}},"relativeSizes":[0.1663353214049039,0.8336646785950961,0],"top":{"simpleVisibility":true}},"docmanager:recents":{"opened":[{"path":"data","contentType":"directory","root":"/home/jovyan"},{"path":"data/articles.csv","contentType":"file","factory":"CSVTable","root":"/home/jovyan"},{"path":"","contentType":"directory","root":"/home/jovyan"},{"path":"analisis.ipynb","contentType":"notebook","factory":"Notebook","root":"/home/jovyan"}],"closed":[]},"file-browser-filebrowser:cwd":{"path":"data"},"csvviewer:data/articles.csv":{"data":{"path":"data/articles.csv","factory":"CSVTable"}},"notebook:analisis.ipynb":{"data":{"path":"analisis.ipynb","factory":"Notebook"}}},"metadata":{"id":"default"}}
--- a/12
+++ b/12
@@ -0,0 +1,12 @@
 FROM quay.io/jupyter/scipy-notebook
 # Install in the default python3 environment
 RUN pip install --no-cache-dir 'flake8' && \
  fix-permissions "${CONDA_DIR}" && \
  fix-permissions "/home/${NB_USER}"
 # Install from the requirements.txt file
 COPY --chown=${NB_UID}:${NB_GID} requirements.txt /tmp/
 RUN pip install --no-cache-dir --requirement /tmp/requirements.txt && \
  fix-permissions "${CONDA_DIR}" && \
  fix-permissions "/home/${NB_USER}"
--- a/README.md
+++ b/README.md
@@ -1,2 +1,94 @@
-# how-gen-ai-affects-researchers
+import pandas as pd
 import seaborn as sns
 from wordcloud import WordCloud
 import matplotlib.pyplot as plt
 from IPython.display import display, Markdown
 from collections import Counter
 from itertools import islice
 articles = pd.read_csv('data/articles.csv')# how-gen-ai-affects-researchers
 ## Most common tags
 man_tags = articles['Manual Tags'].dropna().str.lower()
 auto_tags = articles['Automatic Tags'].dropna().str.lower()
 tags = man_tags.str.split(';').explode().str.strip().to_list() + auto_tags.str.split(';').explode().str.strip().to_list()
 c = Counter(tags)
 text = '## Top 50 common tags:\n\n'
 for val, key in c.most_common(50):
    text += f"1. {val} ({key})\n"
 display(Markdown(text))
 wc = WordCloud(scale=8, background_color="white").generate_from_frequencies(c)
 plt.figure(figsize=(10,10))
 plt.imshow(wc)
 plt.axis("off")
 plt.show()
 ## Title
 titles = articles['Title'].dropna().str.lower()
 tc = WordCloud(scale=8, background_color="white").generate(" ".join(titles))
 plt.figure(figsize=(10,10))
 plt.imshow(tc)
 plt.axis("off")
 plt.show()
 ---
 top_title = "### Top 50 phrase in title\n\n"
 tc_words = dict(islice(dict(sorted(WordCloud(scale=8, background_color="white").process_text(" ".join(titles)).items(), key=lambda item: item[1], reverse=True)).items(), 50))
 for word, freq in tc_words.items():
    top_title += f"1. {word} ({freq})\n"
 display(Markdown(top_title))
 ---
 ## Abstract
 abstracts = articles['Abstract Note'].dropna().str.lower()
 ac = WordCloud(scale=8, background_color="white").generate(" ".join(abstracts))
 plt.figure(figsize=(10,10))
 plt.imshow(ac)
 plt.axis("off")
 plt.show()
 ---
 top_abstract = "### Top 50 phrase in abstract\n\n"
 ac_words = dict(islice(dict(sorted(WordCloud(scale=8, background_color="white").process_text(" ".join(abstracts)).items(), key=lambda item: item[1], reverse=True)).items(), 50))
 for word, freq in ac_words.items():
    top_abstract += f"1. {word} ({freq})\n"
 display(Markdown(top_abstract))
 ---
 ## Journal
 pub = articles['Publication Title'].dropna().str.lower()
 pc = WordCloud(scale=8, background_color="white").generate(" ".join(pub))
 plt.figure(figsize=(10,10))
 plt.imshow(pc)
 plt.axis("off")
 plt.show()
 ---
 top_pub = "### Top 50 phrase in publication title\n\n"
 pc_words = dict(islice(dict(sorted(WordCloud(scale=8, background_color="white").process_text(" ".join(pub)).items(), key=lambda item: item[1], reverse=True)).items(), 50))
 for word, freq in pc_words.items():
    top_pub += f"1. {word} ({freq})\n"
 display(Markdown(top_pub))
--- a/data/articles.csv
+++ b/data/articles.csv
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,2 @@
 nltk==3.9.1
 wordcloud==1.9.4
		`@@ -0,0 +1 @@`
							{"data":{"layout-restorer:data":{"main":{"dock":{"type":"tab-area","currentIndex":2,"widgets":["notebook:analisis.ipynb","csvviewer:data/articles.csv"]},"current":"csvviewer:data/articles.csv"},"down":{"size":0,"widgets":[]},"left":{"collapsed":false,"visible":true,"current":"filebrowser","widgets":["filebrowser","running-sessions","git-sessions","@jupyterlab/toc:plugin","extensionmanager.main-view"],"widgetStates":{"jp-running-sessions":{"sizes":[0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666],"expansionStates":[false,false,false,false,false,false]},"extensionmanager.main-view":{"sizes":[0.5,0.5],"expansionStates":[false,false]}}},"right":{"collapsed":true,"visible":true,"widgets":["jp-property-inspector","debugger-sidebar"],"widgetStates":{"jp-debugger-sidebar":{"sizes":[0.2,0.2,0.2,0.2,0.2],"expansionStates":[false,false,false,false,false]}}},"relativeSizes":[0.1663353214049039,0.8336646785950961,0],"top":{"simpleVisibility":true}},"docmanager:recents":{"opened":[{"path":"data","contentType":"directory","root":"/home/jovyan"},{"path":"data/articles.csv","contentType":"file","factory":"CSVTable","root":"/home/jovyan"},{"path":"","contentType":"directory","root":"/home/jovyan"},{"path":"analisis.ipynb","contentType":"notebook","factory":"Notebook","root":"/home/jovyan"}],"closed":[]},"file-browser-filebrowser:cwd":{"path":"data"},"csvviewer:data/articles.csv":{"data":{"path":"data/articles.csv","factory":"CSVTable"}},"notebook:analisis.ipynb":{"data":{"path":"analisis.ipynb","factory":"Notebook"}}},"metadata":{"id":"default"}}