get unsynced symbol-asOfDate combos in batches

use `clickhouse.sakal.us` instead of `kubectl port-forward`
This commit is contained in:
Avraham Sakal
2024-02-25 15:18:15 -05:00
parent 5c8a54b4b5
commit dabfec86a1
6 changed files with 45 additions and 23 deletions
@@ -52,8 +52,7 @@ async function getOptionContracts(underlyingSymbol, asOfDate){
});
}
//await getOptionContracts('AAPL','2024-01-30');
async function getNextUnstartedSymbolAndAsOfDate(previousUnstartedSymbolAndAsOfDate:{symbol:string, asOfDate:string}){
async function getNextBatchOfUnstartedSymbolsAndAsOfDates(previousUnstartedSymbolAndAsOfDate:{symbol:string, asOfDate:string}, limit:number){
const rows = await query<{symbol:string, earliestAsOfDate:string}>(`
SELECT
symbol,
@@ -81,17 +80,12 @@ async function getNextUnstartedSymbolAndAsOfDate(previousUnstartedSymbolAndAsOfD
symbol > '${previousUnstartedSymbolAndAsOfDate.symbol}'
)
ORDER BY symbol ASC
LIMIT 1
LIMIT ${limit}
`);
if(rows.length === 0){
return null;
}
else{
return {
symbol: rows[0].symbol,
asOfDate: rows[0].earliestAsOfDate,
}
}
return rows.map(row=>({
symbol: row.symbol,
asOfDate: row.earliestAsOfDate,
}));
}
/**
@@ -139,14 +133,28 @@ async function fillSyncStatuses(){
);
}
/** First, make sure we know which symbol-asOfDate combinations are
* yet un-synced.
*/
await fillSyncStatuses();
/** Second, for each symbol-asOfDate combination whose option contracts
* are not known, make them known.
*
* This queries Polygon with a concurrency of 6.
*/
const q = new pQueue({concurrency: 6});
let nextUnstartedSymbolAndAsOfDate = {symbol:'A', asOfDate:'2022-02-01'};
while((nextUnstartedSymbolAndAsOfDate = await getNextUnstartedSymbolAndAsOfDate(nextUnstartedSymbolAndAsOfDate)) !== null){
await q.add(async ()=>{
console.log(`Getting contracts for ${nextUnstartedSymbolAndAsOfDate.symbol} at ${nextUnstartedSymbolAndAsOfDate.asOfDate}`);
await getOptionContracts(nextUnstartedSymbolAndAsOfDate.symbol, nextUnstartedSymbolAndAsOfDate.asOfDate);
});
/** Initialized with the lowest possible symbol and the earliest possible asOfDate.
* It's passed into `getNextUnstartedSymbolAndAsOfDate()`.
*/
let nextBatchOfUnstartedSymbolsAndAsOfDates = [{symbol:'A', asOfDate:'2022-02-01'}];
while((nextBatchOfUnstartedSymbolsAndAsOfDates = await getNextBatchOfUnstartedSymbolsAndAsOfDates(nextBatchOfUnstartedSymbolsAndAsOfDates.pop(), 200)) !== null){
await pAll(nextBatchOfUnstartedSymbolsAndAsOfDates.map((unstartedSymbolAndAsOfDate)=>
()=>q.add(async ()=>{
console.log(`Getting contracts for ${unstartedSymbolAndAsOfDate.symbol} at ${unstartedSymbolAndAsOfDate.asOfDate}`);
await getOptionContracts(unstartedSymbolAndAsOfDate.symbol, unstartedSymbolAndAsOfDate.asOfDate);
})
));
// don't loop again until the queue has less than 50 items; we don't want it to grow in memory without bound:
console.log("Waiting till less than 50 in queue");
await q.onSizeLessThan(50);