1
Fork 0
mirror of https://github.com/RGBCube/nu_scripts synced 2025-08-01 22:57:46 +00:00

Add scraping and gitlab scripts (#206)

Co-authored-by: Yethal <nosuchemail@email.com>
This commit is contained in:
Yethal 2022-04-14 20:56:09 +02:00 committed by GitHub
parent 34c241172b
commit 4a1ccf0e43
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 76 additions and 0 deletions

13
gitlab/README.md Normal file
View file

@ -0,0 +1,13 @@
# Gitlab Scanner
### Definition
I use this script to scan contents of my company's GitLab server. Due to nushell's use of multithreading I'm able to scan around 1k repositories in about 9 seconds
### Setup
1. Generate GitLab Personal Access Token and save it as `GITLAB_TOKEN` environment variable
2. Run the script providing necessary data as arguments (or hardcode them in the script if you don't expect them to change often)
### Possible future improvements
* Multiple files/phrases/branches to search
* Maybe create some stats like how many times a given phrase was found in a repo or file
* Maybe offer an option to replace a phrase and automatically push the updated file or create a merge request

38
gitlab/gitlab.nu Executable file
View file

@ -0,0 +1,38 @@
#!/usr/bin/env nu
let page_size = 100
let projects = $"($base_url)/api/v4/projects/"
def call-gitlab [
...args: string
--query: string
] {
fetch -H [Authorization $"Bearer ($env.GITLAB_TOKEN)"] $"($projects)($args|str collect)?($query)"
}
# Search files on your GitLab server
def main [
--base_url: string # base url of your GitLab instance
--file: string # file (or path to file if in a subfolder) you want to scan
--phrase: string # phrase you want to search for
--branch: string # branch to scan
] {
# /projects endpoint can return up to $page_size items which is why we need multiple calls to retrieve full list
let num-of-pages = ((call-gitlab --query 'page=1&per_page=1&order_by=id&simple=true'|get id.0|into int) / $page_size|math round)
seq 1 $num-of-pages|par-each {|page|
call-gitlab --query $"page=($page)&per_page=($page_size)"|select name id
}
|flatten
|par-each {|repo|
let payload = (call-gitlab $repo.id '/repository/files/' $file --query $"ref=($branch)")
if ($payload|columns|find message|empty?) {
$payload
|get content
|hash base64 --decode
|lines
|find $phrase
|if ($in|length) > 0 {
echo $"($file) in ($repo.name) repo contains ($phrase) phrase"
}
}
}
}

5
webscraping/README.md Normal file
View file

@ -0,0 +1,5 @@
# Web Scraping
### Definition
Simple scripts to demonstrate how to scrape websites in nushell. Requires `query web` plugin

20
webscraping/nuschiit.nu Executable file
View file

@ -0,0 +1,20 @@
#!/usr/bin/env nu
let baseurl = 'https://www.schiit.co.uk/'
let pages = ['headphone-amps' 'dacs' 'schiit-gaming-products' 'power-amplifiers' 'preamps' 'upgrades' 'accessories-cables' 'schiit%20graded%20stock']
# Simple script to check stock of https://schiit.co.uk store
def main [] {
$pages|par-each {|page|
fetch $"($baseurl)($page)"
|query web -q 'div.caption' -m
|par-each {|item|
$item
|query web -q 'p.stock, h5'
|rotate --ccw name availability
}
|flatten
}
|flatten
|uniq
|sort-by availability
}