Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
executable file 234 lines (189 sloc) 6.97 KB
#!/usr/bin/env python3
import sys
import csv
import argparse
import pprint
from scholarly import scholarly
from collections import defaultdict
#from scholarly import ProxyGenerator
###############
# TODO
###############
# Add caching with a sqlite3 database
# Look for unique identifiers for authors
# Take name data from stdin
# Can be fired off from a shell like:
# while read -r line; do ./scholar -p "$line"; done < names |grep -v "^\"Year\",\"Author\",\"Title\",\"Citations\"" |tee output
# Set up a ProxyGenerator object to use free proxies
# This needs to be done only once per session
#pg = ProxyGenerator()
#pg.FreeProxies()
#scholarly.use_proxy(pg)
# Get the author indexes. arg is author_name, return is list
def get_author_indexes(author_name):
idx = []
try:
# Search for the author by name
search_query = scholarly.search_author(author_name)
author = next(search_query, None)
except:
print(f"Error: Connection failed. Try again another time. You might be blocked.")
idx.append((-2, -2, author_name))
return idx
if author is None:
idx.append((-1, -1, author_name))
return idx # No author found
# Retrieve the author's detailed information
author = scholarly.fill(author)
name = author.get('name', 'No name available')
citedby = author.get('citedby', 0)
citedby5y = author.get('citedby5y', 0)
hindex = author.get('hindex', 0)
hindex5y = author.get('hindex5y', 0)
i10index = author.get('i10index', 0)
i10index5y = author.get('i10index5y', 0)
scholar_id = author.get('scholar_id', 0)
idx.append([citedby, citedby5y, hindex, hindex5y, i10index, i10index5y, scholar_id, name])
return idx
# Get the publications per year. arg is author_name, return is list
def get_publications_per_year(author_name):
ppy = []
try:
# Search for the author by name
search_query = scholarly.search_author(author_name)
author = next(search_query, None)
except:
print("Error: Connection failed. Try again another time. You might be blocked.")
ppy.append((-2, -2, author_name))
return ppy
if author is None:
ppy.append((-1, -1, author_name))
return ppy # No author found
# Fill in the author details to get publication info
author = scholarly.fill(author)
name = author.get('name', 'No name available')
# Dictionary to store number of publications per year
publications_per_year = defaultdict(int)
# Iterate over the publications and count the number per year
for pub in author['publications']:
year = pub['bib'].get('pub_year', 'no-year-available')
publications_per_year[year] += 1
# Convert to a regular dictionary and sort by year
publications_per_year = dict(sorted(publications_per_year.items()))
# convert to a list, insert author and return
ppy = []
for k, v in publications_per_year.items():
ppy.append([k, v, name])
return ppy
# Get the citations per year. arg is author_name, return is list
def get_citations_per_year(author_name):
cpy = []
try:
# Search for the author by name
if byid:
search_query = scholarly.search_author_id(author_name)
else:
search_query = scholarly.search_author(author_name)
author = next(search_query, None)
if author is None:
cpy.append((-1, -1, author_name))
return cpy # No author found
# Retrieve the author's detailed information
author = scholarly.fill(author)
name = author.get('name', 'No name available')
except:
print("Error: Connection failed. Try again another time. You might be blocked.")
cpy.append((-2, -2, author_name))
return cpy
# Extract the citations per year
citations_per_year = author.get('cites_per_year', {})
# Convert to a regular dictionary and sort by year
citations_per_year = dict(sorted(citations_per_year.items()))
# convert to a list, insert author and return
for k, v in citations_per_year.items():
cpy.append([k, v, name])
return cpy
# Get the publications. arg is author_name, return is list
def get_author_publications(author_name):
publications = []
try:
# Search for the author
if byid:
search_query = scholarly.search_author_id(author_name)
else:
search_query = scholarly.search_author(author_name)
# Get the first result from the search query
author = next(search_query, None)
if author is None:
publications.append((-1, author_name, -1, -1))
return publications # No author found
# Fill the author information
author = scholarly.fill(author)
name = author.get('name', 'No name available')
except:
print("Error: Connection failed. Try again another time. You might be blocked.")
publications.append((-2, author_name, -2, -2))
return publications
# Extract the year and titles of the publications
for pub in author['publications']:
title = pub['bib'].get('title', 'No title available')
year = pub['bib'].get('pub_year', 'No year available')
journal = pub['bib'].get('citation', 'No journal available')
num_citations = pub.get('num_citations', 0)
publications.append((year, name, title, journal, num_citations))
return publications
# handle args
def processargs():
# Parse the command-line arguments
parser = argparse.ArgumentParser()
parser.add_argument('-c', '--citation', action='store_true', help='Get citations per year')
parser.add_argument('-i', '--indexes', action='store_true', help='Get author indexes')
parser.add_argument('-n', '--noheaders', action='store_true', help='Do not print headers')
parser.add_argument('-p', '--publications', action='store_true', help='Get publications')
parser.add_argument('-y', '--year', action='store_true', help='Get publications per year')
parser.add_argument('author_name', nargs='?')
# Display help and exit if no arguments are provided
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
# Display help and exit if no author name is provided
if args.author_name is None:
parser.print_help()
sys.exit(1)
return args
def main():
args = processargs()
# Set up the CSV writer
csvout = csv.writer(sys.stdout, quoting=csv.QUOTE_ALL)
# Get the citations per year
if args.citation:
citations_per_year = get_citations_per_year(args.author_name)
# Print the results
if not args.noheaders:
csvout.writerow(['Year', 'Num Citations', 'Author'])
csvout.writerows(citations_per_year)
# Get the author indexes
if args.indexes:
author_indexes = get_author_indexes(args.author_name)
# Print the results
if not args.noheaders:
csvout.writerow(['Citedby', 'Citedby5y', 'Hindex', 'Hindex5y', 'I10index', 'I10index5y', 'Author'])
csvout.writerows(author_indexes)
# Get the publications
if args.publications:
publications = get_author_publications(args.author_name)
# Print the results
if not args.noheaders:
csvout.writerow(['Year', 'Author', 'Title', 'Journal', 'Citations'])
csvout.writerows(publications)
# Get the publications per year
if args.year:
publications_per_year = get_publications_per_year(args.author_name)
# Print the results
if not args.noheaders:
csvout.writerow(['Year', 'Num Publications', 'Author'])
csvout.writerows(publications_per_year)
# Call the main function
if __name__ == '__main__':
main()