mirror of
https://github.com/RGBCube/nu_scripts
synced 2025-08-01 22:57:46 +00:00
Add scraping and gitlab scripts (#206)
Co-authored-by: Yethal <nosuchemail@email.com>
This commit is contained in:
parent
34c241172b
commit
4a1ccf0e43
4 changed files with 76 additions and 0 deletions
13
gitlab/README.md
Normal file
13
gitlab/README.md
Normal file
|
@ -0,0 +1,13 @@
|
|||
# Gitlab Scanner
|
||||
|
||||
### Definition
|
||||
|
||||
I use this script to scan contents of my company's GitLab server. Due to nushell's use of multithreading I'm able to scan around 1k repositories in about 9 seconds
|
||||
|
||||
### Setup
|
||||
1. Generate GitLab Personal Access Token and save it as `GITLAB_TOKEN` environment variable
|
||||
2. Run the script providing necessary data as arguments (or hardcode them in the script if you don't expect them to change often)
|
||||
### Possible future improvements
|
||||
* Multiple files/phrases/branches to search
|
||||
* Maybe create some stats like how many times a given phrase was found in a repo or file
|
||||
* Maybe offer an option to replace a phrase and automatically push the updated file or create a merge request
|
38
gitlab/gitlab.nu
Executable file
38
gitlab/gitlab.nu
Executable file
|
@ -0,0 +1,38 @@
|
|||
#!/usr/bin/env nu
|
||||
|
||||
let page_size = 100
|
||||
let projects = $"($base_url)/api/v4/projects/"
|
||||
|
||||
def call-gitlab [
|
||||
...args: string
|
||||
--query: string
|
||||
] {
|
||||
fetch -H [Authorization $"Bearer ($env.GITLAB_TOKEN)"] $"($projects)($args|str collect)?($query)"
|
||||
}
|
||||
# Search files on your GitLab server
|
||||
def main [
|
||||
--base_url: string # base url of your GitLab instance
|
||||
--file: string # file (or path to file if in a subfolder) you want to scan
|
||||
--phrase: string # phrase you want to search for
|
||||
--branch: string # branch to scan
|
||||
] {
|
||||
# /projects endpoint can return up to $page_size items which is why we need multiple calls to retrieve full list
|
||||
let num-of-pages = ((call-gitlab --query 'page=1&per_page=1&order_by=id&simple=true'|get id.0|into int) / $page_size|math round)
|
||||
seq 1 $num-of-pages|par-each {|page|
|
||||
call-gitlab --query $"page=($page)&per_page=($page_size)"|select name id
|
||||
}
|
||||
|flatten
|
||||
|par-each {|repo|
|
||||
let payload = (call-gitlab $repo.id '/repository/files/' $file --query $"ref=($branch)")
|
||||
if ($payload|columns|find message|empty?) {
|
||||
$payload
|
||||
|get content
|
||||
|hash base64 --decode
|
||||
|lines
|
||||
|find $phrase
|
||||
|if ($in|length) > 0 {
|
||||
echo $"($file) in ($repo.name) repo contains ($phrase) phrase"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
5
webscraping/README.md
Normal file
5
webscraping/README.md
Normal file
|
@ -0,0 +1,5 @@
|
|||
# Web Scraping
|
||||
|
||||
### Definition
|
||||
|
||||
Simple scripts to demonstrate how to scrape websites in nushell. Requires `query web` plugin
|
20
webscraping/nuschiit.nu
Executable file
20
webscraping/nuschiit.nu
Executable file
|
@ -0,0 +1,20 @@
|
|||
#!/usr/bin/env nu
|
||||
let baseurl = 'https://www.schiit.co.uk/'
|
||||
let pages = ['headphone-amps' 'dacs' 'schiit-gaming-products' 'power-amplifiers' 'preamps' 'upgrades' 'accessories-cables' 'schiit%20graded%20stock']
|
||||
|
||||
# Simple script to check stock of https://schiit.co.uk store
|
||||
def main [] {
|
||||
$pages|par-each {|page|
|
||||
fetch $"($baseurl)($page)"
|
||||
|query web -q 'div.caption' -m
|
||||
|par-each {|item|
|
||||
$item
|
||||
|query web -q 'p.stock, h5'
|
||||
|rotate --ccw name availability
|
||||
}
|
||||
|flatten
|
||||
}
|
||||
|flatten
|
||||
|uniq
|
||||
|sort-by availability
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue